Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make leela2onnx Conv-nodes compatible with onnx2pytorch #1924

Merged
merged 9 commits into from
Nov 14, 2023
5 changes: 5 additions & 0 deletions src/lc0ctl/leela2onnx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ const OptionId kOutputValue{
"ONNX name to use for value policy head output node."};
const OptionId kOutputMlh{"mlh-head-name", "MlhHeadName",
"ONNX name to use for the MLH head output node."};
const OptionId kOnnxToPytorch{"onnx2pytorch", "Onnx2Pytorch",
"Only use layer definitions supported by onnx2pytorch."};

bool ProcessParameters(OptionsParser* options) {
options->Add<StringOption>(kInputFilenameId);
Expand All @@ -63,6 +65,7 @@ bool ProcessParameters(OptionsParser* options) {
options->Add<StringOption>(kOutputWdl) = "/output/wdl";
options->Add<StringOption>(kOutputValue) = "/output/value";
options->Add<StringOption>(kOutputMlh) = "/output/mlh";
options->Add<BoolOption>(kOnnxToPytorch) = false;
if (!options->ProcessAllFlags()) return false;

const OptionsDict& dict = options->GetOptionsDict();
Expand Down Expand Up @@ -94,6 +97,8 @@ void ConvertLeelaToOnnx() {
onnx_options.output_wdl = dict.Get<std::string>(kOutputWdl);
onnx_options.output_value = dict.Get<std::string>(kOutputValue);
onnx_options.output_wdl = dict.Get<std::string>(kOutputWdl);
// onnx2pytorch only needs an alternate layernorm-implementation
onnx_options.alt_ln = dict.Get<bool>(kOnnxToPytorch);
weights_file = ConvertWeightsToOnnx(weights_file, onnx_options);
}

Expand Down
38 changes: 38 additions & 0 deletions src/neural/onnx/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ std::string OnnxBuilder::Conv(const std::string& name,
const OnnxConst& kernel_weights,
const OnnxConst& bias_weights, int pads) {
auto* node = model_.mutable_graph()->add_node();
auto shape = kernel_weights.GetDimensions().back();
auto out = PopulateStdNodeFields(node, name, input_name, "Conv");
node->add_input(AddInitializer(name + "/w/kernel", kernel_weights));
node->add_input(AddInitializer(name + "/w/bias", bias_weights));
AddIntsAttribute(node, "pads", {pads, pads, pads, pads});
AddIntsAttribute(node, "kernel_shape", {shape, shape});
return out;
}

Expand Down Expand Up @@ -438,4 +440,40 @@ std::string OnnxBuilder::Mish(const std::string& name,
return PopulateStdNodeFields(node, name, input, "Mish");
}

std::string OnnxBuilder::Sqrt(const std::string& name,
const std::string& input) {
auto* node = model_.mutable_graph()->add_node();
return PopulateStdNodeFields(node, name, input, "Sqrt");
}

std::string OnnxBuilder::Reciprocal(const std::string& name,
const std::string& input) {
auto* node = model_.mutable_graph()->add_node();
return PopulateStdNodeFields(node, name, input, "Reciprocal");
}

std::string OnnxBuilder::Cast(const std::string& name, const std::string& input,
pblczero::TensorProto::DataType type) {
auto* node = model_.mutable_graph()->add_node();
auto out = PopulateStdNodeFields(node, name, input, "Cast");
AddIntAttribute(node, "to", type);
return out;
}

std::string OnnxBuilder::ReduceMean(const std::string& name,
const std::string& input,
std::initializer_list<int> axes) {
auto* node = model_.mutable_graph()->add_node();
auto out = PopulateStdNodeFields(node, name, input, "ReduceMean");
if (opset_ < 18) {
AddIntsAttribute(node, "axes", axes);
} else {
node->add_input(AddInitializer(
name + "/axes",
Int64OnnxConst(std::vector<int64_t>(begin(axes), end(axes)),
{static_cast<int>(axes.size())})));
}
return out;
}

} // namespace lczero
8 changes: 7 additions & 1 deletion src/neural/onnx/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class OnnxBuilder {
std::string Add(const std::string& name, const std::string& input1,
const std::string& input2);
std::string Add(const std::string& name, const std::string& input1,
const OnnxConst&);
const OnnxConst& input2);
std::string GlobalAveragePool(const std::string& name,
const std::string& input);
std::string Squeeze(const std::string& name, const std::string& input,
Expand Down Expand Up @@ -120,6 +120,12 @@ class OnnxBuilder {
std::string Where(const std::string& name, const std::string& input1,
const std::string& input2, const std::string& input3);
std::string Mish(const std::string& name, const std::string& input);
std::string Sqrt(const std::string& name, const std::string& input);
std::string Reciprocal(const std::string& name, const std::string& input);
std::string Cast(const std::string& name, const std::string& input,
pblczero::TensorProto::DataType type);
std::string ReduceMean(const std::string& name, const std::string& input,
std::initializer_list<int> axes);
// Returns ONNX model as protobuf.
const pblczero::ModelProto& as_proto() const { return model_; }
// Returns serialized model.
Expand Down
67 changes: 48 additions & 19 deletions src/neural/onnx/converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ class Converter {
const std::string& encoder_in,
const std::string& name);

std::string MakeLayerNorm(OnnxBuilder* builder, const std::string& input,
const std::string& name,
const lczero::OnnxConst& gammas,
const lczero::OnnxConst& betas, float eps = 1e-6);

std::string MakeEncoderLayer(OnnxBuilder* builder,
const LegacyWeights::EncoderLayer& layer,
int embedding_size, int heads,
Expand Down Expand Up @@ -320,10 +325,10 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
name + "/smolgen/dense1/b", flow,
*GetWeghtsConverter(layer.mha.smolgen.dense1_b, {smolgen_hidden_sz}));
flow = MakeActivation(builder, flow, name + "/smolgen/dense1", activation);
flow = builder->LayerNormalization(
name + "/smolgen/ln1", flow,
flow = MakeLayerNorm(
builder, flow, name + "/smolgen/ln1",
*GetWeghtsConverter(layer.mha.smolgen.ln1_gammas, {smolgen_hidden_sz}),
*GetWeghtsConverter(layer.mha.smolgen.ln1_betas, {smolgen_hidden_sz}), 1,
*GetWeghtsConverter(layer.mha.smolgen.ln1_betas, {smolgen_hidden_sz}),
1e-3);
flow = builder->MatMul(
name + "/smolgen/dense2/w", flow,
Expand All @@ -333,13 +338,12 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
*GetWeghtsConverter(layer.mha.smolgen.dense2_b,
{smolgen_gen_sz * heads}));
flow = MakeActivation(builder, flow, name + "/smolgen/dense2", activation);
flow = builder->LayerNormalization(
name + "/smolgen/ln2", flow,
*GetWeghtsConverter(layer.mha.smolgen.ln2_gammas,
{smolgen_gen_sz * heads}),
*GetWeghtsConverter(layer.mha.smolgen.ln2_betas,
{smolgen_gen_sz * heads}),
1, 1e-3);
flow = MakeLayerNorm(builder, flow, name + "/smolgen/ln2",
*GetWeghtsConverter(layer.mha.smolgen.ln2_gammas,
{smolgen_gen_sz * heads}),
*GetWeghtsConverter(layer.mha.smolgen.ln2_betas,
{smolgen_gen_sz * heads}),
1e-3);
flow =
builder->Reshape(name + "/smolgen/gen_from/reshape", flow,
builder->AddInitializer(
Expand All @@ -354,6 +358,33 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
return flow;
}

std::string Converter::MakeLayerNorm(OnnxBuilder* builder,
const std::string& input,
const std::string& name,
const lczero::OnnxConst& gammas,
const lczero::OnnxConst& betas,
float eps) {
if (!options_.alt_ln) {
return builder->LayerNormalization(name, input, gammas, betas, 1, eps);
}
auto in =
builder->Cast(name + "/to_float", input, pblczero::TensorProto::FLOAT);
auto flow = builder->ReduceMean(name + "/mean", in, {1});
in = builder->Sub(name + "/centered", in, flow);
flow = builder->Mul(name + "/squared", in, in);
flow = builder->ReduceMean(name + "/var", flow, {1});
flow =
builder->Add(name + "/var_eps", flow,
static_cast<const OnnxConst&>(FloatOnnxConst({eps}, {1})));
flow = builder->Sqrt(name + "/std", flow);
flow = builder->Reciprocal(name + "/inv_std", flow);
flow = builder->Mul(name + "/normalized", in, flow);
flow = builder->Cast(name + "/to_data_type", flow, GetDataType());
flow = builder->Mul(name + "/gammas", flow, gammas);
flow = builder->Add(name + "/betas", flow, betas);
return flow;
}

std::string Converter::MakeEncoderLayer(
OnnxBuilder* builder, const LegacyWeights::EncoderLayer& layer,
int embedding_size, int heads, const std::string& encoder_in,
Expand Down Expand Up @@ -430,11 +461,10 @@ std::string Converter::MakeEncoderLayer(
alpha_in = encoder_in;
}
flow = builder->Add(name + "/mha/out/skip", flow, alpha_in);

auto ffn_in = builder->LayerNormalization(
name + "/ln1", flow,
*GetWeghtsConverter(layer.ln1_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln1_betas, {embedding_size}), 1);
auto ffn_in =
MakeLayerNorm(builder, flow, name + "/ln1",
*GetWeghtsConverter(layer.ln1_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln1_betas, {embedding_size}));
const int dff_size = layer.ffn.dense1_b.size();
flow =
builder->MatMul(name + "/ffn/dense1/w", ffn_in,
Expand Down Expand Up @@ -462,10 +492,9 @@ std::string Converter::MakeEncoderLayer(
alpha_ffn_in = ffn_in;
}
flow = builder->Add(name + "/ffn/skip", flow, alpha_ffn_in);
flow = builder->LayerNormalization(
name + "/ln2", flow,
*GetWeghtsConverter(layer.ln2_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln2_betas, {embedding_size}), 1);
flow = MakeLayerNorm(builder, flow, name + "/ln2",
*GetWeghtsConverter(layer.ln2_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln2_betas, {embedding_size}));
return flow;
}

Expand Down
1 change: 1 addition & 0 deletions src/neural/onnx/converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct WeightsToOnnxConverterOptions {
int batch_size = -1;
int opset = 17;
bool alt_mish = false;
bool alt_ln = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe rename it so something slightly less cryptic? (alternative_layer_normalization or alt_layer_norm or at the very least add a comment). And I'd say the same with the command line option.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comes from my original code that was only meant for tests...

Copy link
Contributor Author

@patrik-ha patrik-ha Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, fixed (should have considered that after using it)

};

// Converts "classical" weights file to weights file with embedded ONNX model.
Expand Down
1 change: 1 addition & 0 deletions src/neural/onnx/network_onnx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ std::unique_ptr<Network> MakeOnnxNetwork(const std::optional<WeightsFile>& w,
converter_options.opset = opts.GetOrDefault<int>("opset", 17);
converter_options.alt_mish = opts.GetOrDefault<bool>(
"alt_mish", kProvider == OnnxProvider::CPU ? true : false);
converter_options.alt_ln = opts.GetOrDefault<bool>("alt_ln", true);
converter_options.data_type_ =
fp16 ? WeightsToOnnxConverterOptions::DataType::kFloat16
: WeightsToOnnxConverterOptions::DataType::kFloat32;
Expand Down