gpt4 book ai didi

Hadoop JobConf 类已弃用,需要更新示例

转载 作者:可可西里 更新时间:2023-11-01 14:10:59 26 4
gpt4 key购买 nike

我正在编写 hadoop 程序,我真的不想玩弃用的类。我在网上的任何地方都找不到更新的程序

org.apache.hadoop.conf.Configuration

类代替

org.apache.hadoop.mapred.JobConf

类。

   public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(Test.class);
conf.setJobName("TESST");

conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);

conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);

conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);

FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));

JobClient.runJob(conf);
}

这就是我的 main() 的样子。可以请任何人为我提供更新的功能。

最佳答案

这是经典的 WordCount 示例。您会注意到可能没有必要的其他导入的语气,阅读代码您会弄清楚哪个是哪个。

有什么不同?我正在使用工具接口(interface)和 GenericOptionParser 来解析作业命令 a.k.a:hadoop jar ....

在映射器中,您会注意到一个正在运行的东西。您可以去掉它,它通常在您为 Map 方法提供代码时默认调用。我把它放在那里是为了向您提供可以进一步控制映射阶段的信息。这都是使用新的 API。希望对你有帮助。任何其他问题,让我知道!

import java.io.IOException;
import java.util.*;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.GenericOptionsParser;

public class Inception extends Configured implements Tool{

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}

public void run (Context context) throws IOException, InterruptedException {
setup(context);
while (context.nextKeyValue()) {
map(context.getCurrentKey(), context.getCurrentValue(), context);
}
cleanup(context);
}
}

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}

public int run(String[] args) throws Exception {

Job job = Job.getInstance(new Configuration());

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setJarByClass(WordCount.class);

job.submit();
return 0;
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
ToolRunner.run(new WordCount(), otherArgs);
}
}

关于Hadoop JobConf 类已弃用,需要更新示例,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/8603788/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com