gpt4 book ai didi

hadoop - 较新 api 中 hadoop 中的 MultipleOutputs

转载 作者:行者123 更新时间:2023-12-02 21:59:27 26 4
gpt4 key购买 nike

我写了一个简单的字数统计程序并试图得到输出
基于较新API格式的Multipleoutputs,我得到了输出
输出文件中的数据(带有键的名称):

import java.io.IOException; 
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class word {

public static class wordmapper extends
Mapper<LongWritable,Text,Text,IntWritable> {

Text x = new Text();
IntWritable z =new IntWritable(1);
String str1=null;

public void map(LongWritable key, Text value,Context con)
throws IOException, InterruptedException {

StringTokenizer str = new StringTokenizer(value.toString());

while(str.hasMoreTokens()) {
str1=str.nextToken();
x.set(str1);
con.write(x, z);
}

}
}


public static class wordreducer extends Reducer<Text,
IntWritable,Text, IntWritable> {

String generateFilename(Text k) { return k.toString(); }

private MultipleOutputs mos;
public void setup(Context context) {
mos =new MultipleOutputs(context);
}

String m=null;
IntWritable num=new IntWritable();

public void reduce(Text key,Iterable<IntWritable> value,Context con)
throws IOException, InterruptedException {

int sum=0;
for(IntWritable k :value) {
sum=sum+k.get();
}
num.set(sum);
m=generateFilename(key);
//mos.write(key, num, "text");
//mos.write("text", key, num);
mos.write("text", key, value, generateFilename(key));
//con.write(key,num);
}
}

public static void main(String args[])
throws IOException, InterruptedException, ClassNotFoundException {

Configuration con = new Configuration();
Job j= new Job(con,"word");
j.setJarByClass(word.class);

j.setMapperClass(wordmapper.class);
j.setReducerClass(wordreducer.class);
//j.setNumReduceTasks(0);

j.setOutputKeyClass(Text.class);
j.setOutputValueClass(IntWritable.class);

MultipleOutputs.addNamedOutput(j,"text",TextOutputFormat.class,Text.class,IntWritable.class);

//MultipleOutputs.addNamedOutput(j, namedOutput, outputFormatClass, keyClass, valueClass);

FileInputFormat.addInputPath(j, new Path(args[0]));
FileOutputFormat.setOutputPath(j, new Path(args[1]));

System.exit(j.waitForCompletion(true)?0:1);

}
}

我在文件中得到了输出,文件中没有任何数据,并且文件没有打开:
-rw-r--r--   1 root supergroup          0 2014-09-15 03:00 /user/root/listouput/world9/_SUCCESS 
drwxr-xr-x - root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/_logs
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/best-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/good-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/hadoop-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/hello-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/is-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/part-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/rule-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/the-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/we-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/world-r-00000
root@ubuntu:/home/mrinmoy/Desktop/PracticeJar files# hadoop fs -cat
/user/root/listouput/world9/best-r-00000

最佳答案

输出文件为空,因为您忘记关闭 mos ,您需要覆盖 cleanup你的 reducer 的方法:

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}

关于hadoop - 较新 api 中 hadoop 中的 MultipleOutputs,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25846240/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com