diff --git a/bom/build.gradle.kts b/bom/build.gradle.kts index 3128d7215137..bdf6ee4fef22 100644 --- a/bom/build.gradle.kts +++ b/bom/build.gradle.kts @@ -111,6 +111,7 @@ dependencies { apiv("org.apache.commons:commons-pool2") apiv("org.apache.commons:commons-collections4") apiv("org.apache.commons:commons-text") + apiv("org.apache.flink:flink-table-code-splitter") apiv("org.apache.geode:geode-core") apiv("org.apache.hadoop:hadoop-client", "hadoop") apiv("org.apache.hadoop:hadoop-common", "hadoop") diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java index 5f32ab1c7102..2cc845c92022 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java @@ -111,7 +111,9 @@ public static Bindable toBindable(Map parameters, parameters); final ClassDeclaration expr = relImplementor.implementRoot(rel, prefer); - String s = Expressions.toString(expr.memberDeclarations, "\n", false); + String s = + Expressions.toString(expr.memberDeclarations, "\n", false, + CalciteSystemProperty.MAX_METHOD_LENGTH_IN_CHARS_BEFORE_SPLITTING.value()); if (CalciteSystemProperty.DEBUG.value()) { Util.debugCode(System.out, s); diff --git a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java index ebd7a5c02d10..288befb635bd 100644 --- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java +++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java @@ -16,6 +16,8 @@ */ package org.apache.calcite.config; +import org.apache.calcite.linq4j.tree.Expressions; + import com.google.common.collect.ImmutableSet; import org.checkerframework.checker.nullness.qual.Nullable; @@ -420,6 +422,23 @@ public final class CalciteSystemProperty { public static final CalciteSystemProperty FUNCTION_LEVEL_CACHE_MAX_SIZE = intProperty("calcite.function.cache.maxSize", 0, v -> v >= 0); + /** + * The length of code in characters (inclusive) before automatic method splitting is enabled. + * If the length of a generated is equal or less than this value, automatic method splitting is + * disabled. + * + *

A value of {@link Expressions#DISABLE_METHOD_SPLITTING}, the default, disables automatic + * method splitting. + * + *

Some queries can generate methods exceeding the default JIT limit of 4000 characters per + * method. Use this setting to automatically detect and split methods larger than the specified + * limit. + */ + public static final CalciteSystemProperty MAX_METHOD_LENGTH_IN_CHARS_BEFORE_SPLITTING = + intProperty("calcite.linq.method_splitting_character_limit", + Expressions.DISABLE_METHOD_SPLITTING, + v -> v == Expressions.DISABLE_METHOD_SPLITTING || v >= 0); + private static CalciteSystemProperty booleanProperty(String key, boolean defaultValue) { // Note that "" -> true (convenient for command-lines flags like '-Dflag') diff --git a/core/src/main/java/org/apache/calcite/interpreter/JaninoRexCompiler.java b/core/src/main/java/org/apache/calcite/interpreter/JaninoRexCompiler.java index bca4f85ef501..bfb436ebe7b0 100644 --- a/core/src/main/java/org/apache/calcite/interpreter/JaninoRexCompiler.java +++ b/core/src/main/java/org/apache/calcite/interpreter/JaninoRexCompiler.java @@ -186,7 +186,9 @@ static Scalar.Producer baz(ParameterExpression context_, final ClassDeclaration classDeclaration = Expressions.classDecl(Modifier.PUBLIC, "Buzz", null, ImmutableList.of(Scalar.Producer.class), declarations); - String s = Expressions.toString(declarations, "\n", false); + String s = + Expressions.toString(declarations, "\n", false, + CalciteSystemProperty.MAX_METHOD_LENGTH_IN_CHARS_BEFORE_SPLITTING.value()); if (CalciteSystemProperty.DEBUG.value()) { Util.debugCode(System.out, s); } diff --git a/core/src/main/java/org/apache/calcite/rex/RexExecutorImpl.java b/core/src/main/java/org/apache/calcite/rex/RexExecutorImpl.java index b32399b02f19..acd96c114cf6 100644 --- a/core/src/main/java/org/apache/calcite/rex/RexExecutorImpl.java +++ b/core/src/main/java/org/apache/calcite/rex/RexExecutorImpl.java @@ -100,7 +100,9 @@ private static String compile(RexBuilder rexBuilder, List constExps, Expressions.methodDecl(Modifier.PUBLIC, Object[].class, BuiltInMethod.FUNCTION1_APPLY.method.getName(), ImmutableList.of(root0_), blockBuilder.toBlock()); - String code = Expressions.toString(methodDecl); + String code = + Expressions.toString(methodDecl, + CalciteSystemProperty.MAX_METHOD_LENGTH_IN_CHARS_BEFORE_SPLITTING.value()); if (CalciteSystemProperty.DEBUG.value()) { Util.debugCode(System.out, code); } diff --git a/gradle.properties b/gradle.properties index a0004d1fc812..491411b9a039 100644 --- a/gradle.properties +++ b/gradle.properties @@ -102,6 +102,7 @@ dropwizard-metrics.version=4.0.5 # do not upgrade this, new versions are Category X license. elasticsearch.version=7.10.2 embedded-redis.version=0.6 +flink-table-code-splitter.version=1.20.0 jts-core.version=1.19.0 jts-io-common.version=1.19.0 proj4j.version=1.2.2 diff --git a/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbToEnumerableConverter.java b/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbToEnumerableConverter.java index 858ddbb3a0d3..f6d57e79eeff 100644 --- a/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbToEnumerableConverter.java +++ b/innodb/src/main/java/org/apache/calcite/adapter/innodb/InnodbToEnumerableConverter.java @@ -150,7 +150,9 @@ static List innodbFieldNames(final RelDataType rowType) { InnodbMethod.INNODB_QUERYABLE_QUERY.method, fields, selectFields, cond, ascOrder)); if (CalciteSystemProperty.DEBUG.value()) { - System.out.println("Innodb: " + Expressions.toString(enumerable)); + System.out.println( + "Innodb: " + Expressions.toString(enumerable, + CalciteSystemProperty.MAX_METHOD_LENGTH_IN_CHARS_BEFORE_SPLITTING.value())); } list.add(Expressions.return_(null, enumerable)); return implementor.result(physType, list.toBlock()); diff --git a/linq4j/build.gradle.kts b/linq4j/build.gradle.kts index 68770959b535..d678ca45aeaa 100644 --- a/linq4j/build.gradle.kts +++ b/linq4j/build.gradle.kts @@ -20,4 +20,8 @@ dependencies { implementation("com.google.guava:guava") implementation("org.apache.calcite.avatica:avatica-core") + // Note that only the flink-table-code-splitter module (and its dependencies) is imported + // from Flink. Other Flink modules have Calcite as a dependency and can introduce circular + // dependencies. + implementation("org.apache.flink:flink-table-code-splitter") } diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/AbstractNode.java b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/AbstractNode.java index a60241123862..ed33789e2b63 100644 --- a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/AbstractNode.java +++ b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/AbstractNode.java @@ -51,7 +51,8 @@ public Type getType() { } @Override public String toString() { - ExpressionWriter writer = new ExpressionWriter(true); + // Use the JVM limit for method size (4K) as the method splitting threshold. + ExpressionWriter writer = new ExpressionWriter(); accept(writer, 0, 0); return writer.toString(); } diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ClassDeclaration.java b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ClassDeclaration.java index 184961e354f1..93998e4bc411 100644 --- a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ClassDeclaration.java +++ b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ClassDeclaration.java @@ -16,6 +16,9 @@ */ package org.apache.calcite.linq4j.tree; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.table.codesplit.JavaCodeSplitter; + import org.checkerframework.checker.nullness.qual.Nullable; import java.lang.reflect.Modifier; @@ -46,19 +49,35 @@ public ClassDeclaration(int modifier, String name, @Nullable Type extended, } @Override public void accept(ExpressionWriter writer) { + // Conditionally serialize the class declaration directly to the supplied writer if method + // splitting is disabled or to a temporary writer that will generate code for the method that + // can be split before being serialized to the supplied writer. + final ExpressionWriter writerForClassWithoutSplitting = writer.usesMethodSplitting() + ? writer.duplicateState() : writer; + String modifiers = Modifier.toString(modifier); - writer.append(modifiers); + writerForClassWithoutSplitting.append(modifiers); if (!modifiers.isEmpty()) { - writer.append(' '); + writerForClassWithoutSplitting.append(' '); } - writer.append(classClass).append(' ').append(name); + writerForClassWithoutSplitting.append(classClass).append(' ').append(name); if (extended != null) { - writer.append(" extends ").append(extended); + writerForClassWithoutSplitting.append(" extends ").append(extended); } if (!implemented.isEmpty()) { - writer.list(" implements ", ", ", "", implemented); + writerForClassWithoutSplitting.list(" implements ", ", ", "", implemented); + } + writerForClassWithoutSplitting.list(" {\n", "", "}", memberDeclarations); + + if (writer.usesMethodSplitting()) { + final int defaultMaxMembersGeneratedCode = 10000; + + writer.append( + StringUtils.stripStart( + JavaCodeSplitter.split(writerForClassWithoutSplitting.toString(), + writer.getMaxMethodLengthInChars(), defaultMaxMembersGeneratedCode), + " ")); } - writer.list(" {\n", "", "}", memberDeclarations); writer.newlineAndIndent(); } diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ExpressionWriter.java b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ExpressionWriter.java index 8bc640bac6e4..fb4d649a3079 100644 --- a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ExpressionWriter.java +++ b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/ExpressionWriter.java @@ -32,15 +32,31 @@ class ExpressionWriter { private final Spacer spacer = new Spacer(0); private final StringBuilder buf = new StringBuilder(); - private boolean indentPending; private final boolean generics; + /** The maximum number of characters (inclusive) in a method before method splitting is + * enabled. A value of {@link Expressions#DISABLE_METHOD_SPLITTING} indicates that there is no + * limit. + */ + private final int maxMethodLengthInChars; + private boolean indentPending; + ExpressionWriter() { - this(true); + this(true, Expressions.DISABLE_METHOD_SPLITTING); } - ExpressionWriter(boolean generics) { + /** + * Creates an ExpressionWriter that can optionally emit generics and split methods. + * + * @param generics Indicates if generic type arguments should be written. + * @param maxMethodLengthInChars The maximum number of characters (inclusive) in a generated + * method before the method is split up into smaller methods. + * A value of {@link Expressions#DISABLE_METHOD_SPLITTING} + * indicates that there is no limit to method size. + */ + ExpressionWriter(boolean generics, int maxMethodLengthInChars) { this.generics = generics; + this.maxMethodLengthInChars = maxMethodLengthInChars; } public void write(Node expression) { @@ -73,6 +89,14 @@ public boolean requireParentheses(Expression expression, int lprec, return true; } + public boolean usesMethodSplitting() { + return maxMethodLengthInChars != Expressions.DISABLE_METHOD_SPLITTING; + } + + public int getMaxMethodLengthInChars() { + return maxMethodLengthInChars; + } + /** * Increases the indentation level. */ @@ -196,4 +220,12 @@ public void backUp() { indentPending = false; } } + + ExpressionWriter duplicateState() { + final ExpressionWriter writer = + new ExpressionWriter(this.generics, this.maxMethodLengthInChars); + writer.indentPending = this.indentPending; + writer.spacer.add(this.spacer.get()); + return writer; + } } diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/Expressions.java b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/Expressions.java index ebebd2ca6921..6baf5b8930b5 100644 --- a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/Expressions.java +++ b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/Expressions.java @@ -51,6 +51,11 @@ * Utility methods for expressions, including a lot of factory methods. */ public abstract class Expressions { + /** + * Used to disable method splitting when generating Java code with {@link #toString(Node, int)}. + */ + public static final int DISABLE_METHOD_SPLITTING = -1; + private Expressions() {} /** @@ -59,7 +64,19 @@ private Expressions() {} */ public static String toString(List expressions, String sep, boolean generics) { - final ExpressionWriter writer = new ExpressionWriter(generics); + return toString(expressions, sep, generics, Expressions.DISABLE_METHOD_SPLITTING); + } + + /** + * Converts a list of expressions to Java source code, optionally emitting + * extra type information in generics. Optionally splits the generated method if + * the method exceeds the length specified in maxMethodLengthInChars. + * A maxMethodLengthInChars value of {@link #DISABLE_METHOD_SPLITTING} + * disables method splitting. + */ + public static String toString(List expressions, String sep, + boolean generics, int maxMethodLengthInChars) { + final ExpressionWriter writer = new ExpressionWriter(generics, maxMethodLengthInChars); for (Node expression : expressions) { writer.write(expression); writer.append(sep); @@ -71,7 +88,17 @@ public static String toString(List expressions, String sep, * Converts an expression to Java source code. */ public static String toString(Node expression) { - return toString(Collections.singletonList(expression), "", true); + return toString(expression, Expressions.DISABLE_METHOD_SPLITTING); + } + + /** + * Converts an expression to Java source code. + * Optionally splits the generated method if the method exceeds the length specified in + * maxMethodLengthInChars. A maxMethodLengthInChars value of {@link #DISABLE_METHOD_SPLITTING} + * disables method splitting. + */ + public static String toString(Node expression, int maxMethodLengthInChars) { + return toString(Collections.singletonList(expression), "", true, maxMethodLengthInChars); } /** diff --git a/linq4j/src/test/java/org/apache/calcite/linq4j/test/ExpressionTest.java b/linq4j/src/test/java/org/apache/calcite/linq4j/test/ExpressionTest.java index 52577617583e..0ef7b00e03db 100644 --- a/linq4j/src/test/java/org/apache/calcite/linq4j/test/ExpressionTest.java +++ b/linq4j/src/test/java/org/apache/calcite/linq4j/test/ExpressionTest.java @@ -21,18 +21,22 @@ import org.apache.calcite.linq4j.tree.BlockStatement; import org.apache.calcite.linq4j.tree.Blocks; import org.apache.calcite.linq4j.tree.ClassDeclaration; +import org.apache.calcite.linq4j.tree.ConditionalStatement; import org.apache.calcite.linq4j.tree.DeclarationStatement; import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.linq4j.tree.FieldDeclaration; import org.apache.calcite.linq4j.tree.FunctionExpression; import org.apache.calcite.linq4j.tree.MethodCallExpression; +import org.apache.calcite.linq4j.tree.MethodDeclaration; import org.apache.calcite.linq4j.tree.NewExpression; import org.apache.calcite.linq4j.tree.Node; import org.apache.calcite.linq4j.tree.ParameterExpression; import org.apache.calcite.linq4j.tree.Shuttle; import org.apache.calcite.linq4j.tree.Types; +import org.apache.commons.lang3.StringUtils; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -69,6 +73,7 @@ import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.hasToString; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -1719,6 +1724,187 @@ public void checkBlockBuilder(boolean optimizing, String expected) { Expressions.toString(Expressions.constant(set))); } + /** Test case for [CALCITE-6465] + * Rework code generation to use Flink code splitter. */ + @Test void testLargeMethodWithMethodSplitting() { + // The set of Expression objects used in this test will result in initial Java code + // of the form: + // public class MyClass { + // public void hugeMethod() { + // if (true) { + // // Long variable name declaration 1; + // } else if (false) { + // // Long variable name declaration 2; + // } else if (false) { + // // Long variable name declaration 3; + // } else if (false) { + // // Long variable name declaration 4; + // } else + // // Long variable name declaration 4; + // } + // } + + // This test will use Flink's method splitting to extract long local variable names in the + // "hugeMethod" method to fields of MyClass. This will in turn reduce the length of the + // method (but can increase the length of the class itself). + + // Generate long variable names. + final String longName = StringUtils.repeat('a', 5000); + + final DeclarationStatement longDecl = + Expressions.declare(0, longName, Expressions.constant(10)); + final ConditionalStatement ifThenElse = + Expressions.ifThenElse( + Expressions.constant(true), + longDecl, + Expressions.constant(false), + longDecl, + Expressions.constant(false), + longDecl, + longDecl); + + final MethodDeclaration hugeMethod = + Expressions.methodDecl(Modifier.PUBLIC, + Void.TYPE, + "hugeMethod", + Collections.emptyList(), + Blocks.toFunctionBlock(ifThenElse)); + + final ClassDeclaration classDecl = + new ClassDeclaration(Modifier.PUBLIC, + "MyClass", + null, + ImmutableList.of(), + ImmutableList.of(hugeMethod)); + + final String originalClass = Expressions.toString(classDecl); + final String classWithMethodSplitting = Expressions.toString(classDecl, 4000); + + // Validate that the Flink method splitter ran. + // Test that some local variables in hugeMethod have been extracted to be fields + // named local$ in the enclosing class definition. + assertThat("The non-split method should not generate a field containing the prefix" + + "local$", !originalClass.contains("local$")); + assertThat("The split method should rewrite the method to create a field with a name" + + " prefixed local$", classWithMethodSplitting.contains("local$")); + } + + /** Test case for [CALCITE-6465] + * Rework code generation to use Flink code splitter. */ + @Test void testMethodThatDoesNotNeedToBeSplit() { + // The set of Expression objects used in this test will result in initial Java code + // of the form: + // public class MyClass { + // public void testMethod() { + // if (true) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else + // int dummyVar = 0; + // } + // } + + // This test verifies that the Flink method splitter does not get invoked if the method + // is already short enough to fit within the size requested. + + // Generate long variable names. + final String localVariable = "dummyVar"; + + final DeclarationStatement localDecl = + Expressions.declare(0, localVariable, Expressions.constant(10)); + final ConditionalStatement ifThenElse = + Expressions.ifThenElse( + Expressions.constant(true), + localDecl, + Expressions.constant(false), + localDecl, + Expressions.constant(false), + localDecl, + localDecl); + + final MethodDeclaration hugeMethod = + Expressions.methodDecl(Modifier.PUBLIC, + Void.TYPE, + "testMethod", + Collections.emptyList(), + Blocks.toFunctionBlock(ifThenElse)); + + final ClassDeclaration classDecl = + new ClassDeclaration(Modifier.PUBLIC, + "MyClass", + null, + ImmutableList.of(), + ImmutableList.of(hugeMethod)); + + final String originalClass = Expressions.toString(classDecl); + final String classWithMethodSplitting = Expressions.toString(classDecl, 1000); + + // Validate that the results with and without method splitting are the same. + assertThat("The generated code with and without method splitting should be the same", + originalClass.equals(classWithMethodSplitting)); + } + + /** Test case for [CALCITE-6465] + * Rework code generation to use Flink code splitter. */ + @Test void testMethodThatCannotFitWithinSizeLimit() { + // The set of Expression objects used in this test will result in initial Java code + // of the form: + // public class MyClass { + // public void testMethod() { + // if (true) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else if (false) { + // int dummyVar = 0; + // } else + // int dummyVar = 0; + // } + // } + + // This test will attempt to use Flink's method splitting but fail because the method cannot + // be reduced to the number of characters requested. It should fail gracefully in this + // scenario. + + // Generate long variable names. + final String localVariable = "dummyVar"; + + final DeclarationStatement localDecl = + Expressions.declare(0, localVariable, Expressions.constant(10)); + final ConditionalStatement ifThenElse = + Expressions.ifThenElse( + Expressions.constant(true), + localDecl, + Expressions.constant(false), + localDecl, + Expressions.constant(false), + localDecl, + localDecl); + + final MethodDeclaration hugeMethod = + Expressions.methodDecl(Modifier.PUBLIC, + Void.TYPE, + "testMethod", + Collections.emptyList(), + Blocks.toFunctionBlock(ifThenElse)); + + final ClassDeclaration classDecl = + new ClassDeclaration(Modifier.PUBLIC, + "MyClass", + null, + ImmutableList.of(), + ImmutableList.of(hugeMethod)); + + assertDoesNotThrow(() -> Expressions.toString(classDecl, 5)); + } + /** An enum. */ enum MyEnum { X, diff --git a/ubenchmark/src/jmh/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java b/ubenchmark/src/jmh/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java index 22c745dd8f86..397a7a3afa61 100644 --- a/ubenchmark/src/jmh/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java +++ b/ubenchmark/src/jmh/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java @@ -184,8 +184,11 @@ public void setup() { EnumerableRelImplementor relImplementor = new EnumerableRelImplementor(plan.getCluster().getRexBuilder(), new HashMap<>()); info.classExpr = relImplementor.implementRoot(plan, EnumerableRel.Prefer.ARRAY); + + // Set the method splitting threshold to the JVM limit of 4K. info.javaCode = - Expressions.toString(info.classExpr.memberDeclarations, "\n", false); + Expressions.toString(info.classExpr.memberDeclarations, + "\n", false, 4000); info.plan = plan; planInfos[i] = info; }