- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
如果我增加 word2vec 模型的模型大小,我开始在 log 中遇到这种异常:
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 6
at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$2.apply(MapOutputTracker.scala:542)
at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$2.apply(MapOutputTracker.scala:538)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.MapOutputTracker$.org$apache$spark$MapOutputTracker$$convertMapStatuses(MapOutputTracker.scala:538)
at org.apache.spark.MapOutputTracker.getMapSizesByExecutorId(MapOutputTracker.scala:155)
at org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:47)
at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:98)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:96)
at org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:95)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:927)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:927)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
def save(model: Word2VecModel, sc: SparkContext, path: String): Unit = {
println("Saving model as CSV ..")
val vectorSize = model.getVectors.values.head.size
println("vectorSize="+vectorSize)
val SEPARATOR_TOKEN = " "
val dataArray = model.getVectors.toSeq.map { case (w, v) => Data(w, v) }
println("Got dataArray ..")
println("parallelize(dataArray, 10)")
val par = sc.parallelize(dataArray, 10)
.map(d => {
val sb = new mutable.StringBuilder()
sb.append(d.word)
sb.append(SEPARATOR_TOKEN)
for(v <- d.vector) {
sb.append(v)
sb.append(SEPARATOR_TOKEN)
}
sb.setLength(sb.length - 1)
sb.append("\n")
sb.toString()
})
println("repartition(1)")
val rep = par.repartition(1)
println("collect()")
val vectorsAsString = rep.collect()
println("Collected serialized vectors ..")
val cfile = new mutable.StringBuilder()
cfile.append(vectorsAsString.length)
cfile.append(" ")
cfile.append(vectorSize)
cfile.append("\n")
val sb = new StringBuilder
sb.append("word,")
for(i <- 0 until vectorSize) {
sb.append("v")
sb.append(i.toString)
sb.append(",")
}
sb.setLength(sb.length - 1)
sb.append("\n")
for(vectorString <- vectorsAsString) {
sb.append(vectorString)
cfile.append(vectorString)
}
println("Saving file to " + new Path(path, "data").toUri.toString)
sc.parallelize(sb.toString().split("\n"), 1).saveAsTextFile(new Path(path+".csv", "data").toUri.toString)
sc.parallelize(cfile.toString().split("\n"), 1).saveAsTextFile(new Path(path+".cs", "data").toUri.toString)
}
最佳答案
MetadataFetchFailedException
当 MapOutputTracker
时抛出在执行程序上找不到本地缓存中分区的请求的随机映射输出,并尝试从驱动程序的 MapOutputTracker
远程获取它们.
这可能会导致几个结论:
关于scala - 如何修复 "MetadataFetchFailedException: Missing an output location for shuffle"?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36815506/
当我使用 1 GB 数据集运行解析代码时,它完成且没有任何错误。但是,当我一次尝试 25 GB 数据时,出现以下错误。我试图了解如何避免以下故障。很高兴听到任何建议或想法。 不同的错误, org.ap
当我使用 1 GB 数据集运行解析代码时,它完成时没有任何错误。但是,当我一次尝试 25 GB 的数据时,出现以下错误。我试图了解如何避免以下失败。很高兴听到任何建议或想法。 不同的错误, org.a
如果我增加 word2vec 模型的模型大小,我开始在 log 中遇到这种异常: org.apache.spark.shuffle.MetadataFetchFailedException: Miss
我正在 EC2 集群上部署 Spark 数据处理作业,该作业对于集群来说很小(16 个核心,总共 120G RAM),最大的 RDD 只有 76k+ 行。但是中间严重倾斜(因此需要重新分区)并且每
我正在以推测模式运行 Spark 作业。我有大约 500 个任务和大约 500 个 1 GB gz 压缩文件。我不断地进入每项作业,对于 1-2 个任务,附加错误,然后它会重新运行数十次(阻止作业完成
我是一名优秀的程序员,十分优秀!