From 1f6bd7ab55db3e5b08ff133d554b32db83a23890 Mon Sep 17 00:00:00 2001 From: Moritz Meister <8422705+moritzmeister@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:33:33 +0100 Subject: [PATCH] [LIVY-863] Missing JVM class imports for Spark3 ## What changes were proposed in this pull request? Description of the problem: https://issues.apache.org/jira/browse/LIVY-863 The proposed fix consists of adding the missing imports that upstream Spark has when it's initiating the Java Gateway. See the imports of Spark here: https://github.com/apache/spark/blob/87bf6b0ea4ca0618c8604895d05037edce8b7cb0/python/pyspark/java_gateway.py#L153 As far as I am aware, the java_import() does not fail or fails silently if the imported class does not exist. But I might need to add some code to account for different Spark versions, looking for some guidance on this. ## How was this patch tested? Tested with both valid and "bogus" imports by reviewers. --- repl/src/main/resources/fake_shell.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/repl/src/main/resources/fake_shell.py b/repl/src/main/resources/fake_shell.py index b5b284d8f..5472f533e 100644 --- a/repl/src/main/resources/fake_shell.py +++ b/repl/src/main/resources/fake_shell.py @@ -589,8 +589,11 @@ def main(): java_import(gateway.jvm, "org.apache.spark.SparkConf") java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") + java_import(gateway.jvm, "org.apache.spark.ml.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") + java_import(gateway.jvm, "org.apache.spark.resource.*") java_import(gateway.jvm, "org.apache.spark.sql.*") + java_import(gateway.jvm, "org.apache.spark.sql.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.hive.*") java_import(gateway.jvm, "scala.Tuple2")