gpt4 book ai didi

hadoop - 空指针异常时,简单的Map-Reduce代码失败

转载 作者:行者123 更新时间:2023-12-02 21:04:10 25 4
gpt4 key购买 nike

我正在尝试运行此简单的map reduce代码,该代码计算文本文件中每个单词的出现(此代码在类中提供):

   import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.LongWritable;

public class WordCount {

public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}

public static class ReduceClass extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}

public static class PartitionerClass extends Partitioner<Text, IntWritable> {
@Override
public int getPartition(Text key, IntWritable value, int numPartitions) {
return getLanguage(key) % numPartitions;
}

private int getLanguage(Text key) {
if (key.getLength() > 0) {
int c = key.charAt(0);
if (c >= Long.decode("0x05D0").longValue() && c <= Long.decode("0x05EA").longValue())
return 1;
}
return 0;
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//conf.set("mapred.map.tasks","10");
//conf.set("mapred.reduce.tasks","2");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(MapClass.class);
job.setPartitionerClass(PartitionerClass.class);
job.setCombinerClass(ReduceClass.class);
job.setReducerClass(ReduceClass.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

我正在得到这个奇怪的空指针异常,不知道它从哪里来。我在pom.xml依赖项中包含了hadoop-common,hadoop-mapreduce-client-core,hadoop-hdfs。
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Exception in thread "main" java.lang.NullPointerException
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
at org.apache.hadoop.util.Shell.run(Shell.java:379)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:435)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:277)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:125)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:344)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
at WordCount.main(WordCount.java:74)

最佳答案

查看您的代码,我唯一看到的问题是您没有设置reduce任务的数量,但是您的分区程序期望有两个。您将使用默认数量的1

您可以尝试使用以下方法在驱动程序中进行设置:

    job.setNumReduceTasks(2);

关于hadoop - 空指针异常时,简单的Map-Reduce代码失败,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42422960/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com