现象
使用spark-submit提交一个Spark Streaming Application至yarn集群, 报错
Caused by: java.lang.ClassNotFoundException: XXXStartup$$anonfun$9 at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:266) at java.io.ObjectInputStream.resolveClass(ObjectInputStream.java:623) at org.apache.spark.streaming.ObjectInputStreamWithLoader.resolveClass(Checkpoint.scala:286) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1610) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1515) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1769) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348) at java.io.ObjectInputStream.readArray(ObjectInputStream.java:1704) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1342) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348) at java.io.ObjectInputStream.readArray(ObjectInputStream.java:1704) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1342) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989) at java.io.ObjectInputStream.defaultReadObject(ObjectInputStream.java:499) at org.apache.spark.streaming.DStreamGraph$$anonfun$readObject$1.apply$mcV$sp(DStreamGraph.scala:188) at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1138) ... 31 more Exception in thread "main" org.apache.spark.SparkException: Failed to read checkpoint from directory XXX_startup at org.apache.spark.streaming.CheckpointReader$.read(Checkpoint.scala:272) at org.apache.spark.streaming.StreamingContext$.getOrCreate(StreamingContext.scala:624) at XXXStartup$.main(XXXStartup.scala:79) at XXXStartup.main(XXXStartup.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
解决方案及原因
StreamingContext是这样创建的:
val createStreamingContext = (checkPointDir: String) => { val sparkConf = new SparkConf().setAppName(topic) val sparkContext = new SparkContext(sparkConf) @transient val streamingContext = new StreamingContext(sparkContext, Seconds(args(1).toInt)) streamingContext.checkpoint(checkPointDir) streamingContext } val checkPointDir = AppConf.strCheckPointPrefix + topic val streamingContext = StreamingContext.getOrCreate(checkPointDir, () => createStreamingContext(checkPointDir))
重新编译application jar包之后, 再次提交app之前没有清除checkpoint目录下已经存在的之前的application生成的checkpoint文件导致. 清除之后再提交即可
作者:牛肉圆粉不加葱
链接:https://www.jianshu.com/p/807b0767953a