-
Notifications
You must be signed in to change notification settings - Fork 22
GpuXGBoostSpark error when run GPU Mortgage example #34
Comments
@zhouyanxin282446 Can you provide your parameters of how you submit the application? |
I use the parameters the same as the Launch GPU Mortgage Example: |
@zhouyanxin282446 Since you were testing the sample in Standalone mode, which might need your add 1 below conf
|
hi,
I meet a error when I run GPU Mortgage example, Spark Standalone cluster, python application and cudf10.2
Below is the error log:
21/04/22 02:11:38 ERROR GpuXGBoostSpark: The job was aborted due to
java.lang.reflect.InvocationTargetException
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuUtils$.toColumnarRdd(GpuUtils.scala:39)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainOnGpuInternal(GpuXGBoost.scala:240)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainDistributedOnGpu(GpuXGBoost.scala:186)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainOnGpu(GpuXGBoost.scala:91)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.fitOnGpu(GpuXGBoost.scala:52)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.fit(XGBoostClassifier.scala:170)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.fit(XGBoostClassifier.scala:41)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.NoClassDefFoundError: ai/rapids/cudf/ColumnView
at com.nvidia.spark.rapids.CastExprMeta.convertToGpu(GpuCast.scala:88)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:755)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:747)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:755)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:747)
at com.nvidia.spark.rapids.GpuOverrides$$anon$147.$anonfun$convertToGpu$21(GpuOverrides.scala:2490)
at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1171)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1161)
at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1171)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1161)
at scala.collection.immutable.Stream.force(Stream.scala:274)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:432)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$1(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:405)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$1(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:405)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at com.nvidia.spark.rapids.GpuOverrides.addSortsIfNeeded(GpuOverrides.scala:2854)
at com.nvidia.spark.rapids.GpuOverrides.applyOverrides(GpuOverrides.scala:2814)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:2787)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:2776)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1(Columnar.scala:514)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1$adapted(Columnar.scala:513)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:513)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:482)
at org.apache.spark.sql.execution.QueryExecution$.$anonfun$prepareForExecution$1(QueryExecution.scala:324)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:89)
at org.apache.spark.sql.execution.QueryExecution$.prepareForExecution(QueryExecution.scala:324)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:112)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:138)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:138)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:112)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:105)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:126)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3200)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3198)
at org.apache.spark.sql.rapids.execution.InternalColumnarRddConverter$.convert(InternalColumnarRddConverter.scala:485)
at com.nvidia.spark.rapids.ColumnarRdd$.convert(ColumnarRdd.scala:47)
at com.nvidia.spark.rapids.ColumnarRdd.convert(ColumnarRdd.scala)
... 22 more
Caused by: java.lang.ClassNotFoundException: ai.rapids.cudf.ColumnView
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:583)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:178)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:521)
... 82 more
21/04/22 02:11:38 INFO RabitTracker$TrackerProcessLogger: Tracker Process ends with exit code 143
21/04/22 02:11:38 INFO SparkUI: Stopped Spark web UI at http://7e7a98e233be:4040
21/04/22 02:11:38 INFO StandaloneSchedulerBackend: Shutting down all executors
21/04/22 02:11:38 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Asking each executor to shut down
21/04/22 02:11:38 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
21/04/22 02:11:38 INFO MemoryStore: MemoryStore cleared
21/04/22 02:11:38 INFO BlockManager: BlockManager stopped
21/04/22 02:11:38 INFO BlockManagerMaster: BlockManagerMaster stopped
21/04/22 02:11:38 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
21/04/22 02:11:38 INFO SparkContext: Successfully stopped SparkContext
Traceback (most recent call last):
File "/opt/xgboost/main.py", line 18, in
main()
File "/opt/xgboost/samples.zip/com/nvidia/spark/examples/main.py", line 21, in main
File "/opt/xgboost/samples.zip/com/nvidia/spark/examples/mortgage/gpu_main.py", line 41, in main
File "/opt/xgboost/samples.zip/com/nvidia/spark/examples/utility/utils.py", line 46, in with_benchmark
File "/opt/xgboost/samples.zip/com/nvidia/spark/examples/mortgage/gpu_main.py", line 41, in
File "/opt/spark/python/lib/pyspark.zip/pyspark/ml/base.py", line 129, in fit
File "/opt/spark/python/lib/pyspark.zip/pyspark/ml/wrapper.py", line 321, in _fit
File "/opt/spark/python/lib/pyspark.zip/pyspark/ml/wrapper.py", line 318, in _fit_java
File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in call
File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, in deco
File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o57.fit.
: java.lang.NoClassDefFoundError: ai/rapids/cudf/ColumnView
at com.nvidia.spark.rapids.CastExprMeta.convertToGpu(GpuCast.scala:88)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:755)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:747)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:755)
at com.nvidia.spark.rapids.UnaryExprMeta.convertToGpu(RapidsMeta.scala:747)
at com.nvidia.spark.rapids.GpuOverrides$$anon$147.$anonfun$convertToGpu$21(GpuOverrides.scala:2490)
at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1171)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1161)
at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1171)
at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1161)
at scala.collection.immutable.Stream.force(Stream.scala:274)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:432)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$1(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:405)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$1(TreeNode.scala:336)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:405)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:403)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:356)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:336)
at com.nvidia.spark.rapids.GpuOverrides.addSortsIfNeeded(GpuOverrides.scala:2854)
at com.nvidia.spark.rapids.GpuOverrides.applyOverrides(GpuOverrides.scala:2814)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:2787)
at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:2776)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1(Columnar.scala:514)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1$adapted(Columnar.scala:513)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:513)
at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:482)
at org.apache.spark.sql.execution.QueryExecution$.$anonfun$prepareForExecution$1(QueryExecution.scala:324)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:89)
at org.apache.spark.sql.execution.QueryExecution$.prepareForExecution(QueryExecution.scala:324)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:112)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:138)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:138)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:112)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:105)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:126)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3200)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3198)
at org.apache.spark.sql.rapids.execution.InternalColumnarRddConverter$.convert(InternalColumnarRddConverter.scala:485)
at com.nvidia.spark.rapids.ColumnarRdd$.convert(ColumnarRdd.scala:47)
at com.nvidia.spark.rapids.ColumnarRdd.convert(ColumnarRdd.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuUtils$.toColumnarRdd(GpuUtils.scala:39)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainOnGpuInternal(GpuXGBoost.scala:240)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainDistributedOnGpu(GpuXGBoost.scala:186)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.trainOnGpu(GpuXGBoost.scala:91)
at ml.dmlc.xgboost4j.scala.spark.rapids.GpuXGBoost$.fitOnGpu(GpuXGBoost.scala:52)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.fit(XGBoostClassifier.scala:170)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.fit(XGBoostClassifier.scala:41)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ClassNotFoundException: ai.rapids.cudf.ColumnView
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:583)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:178)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:521)
... 82 more
The text was updated successfully, but these errors were encountered: