gpt4 book ai didi

hadoop - 此处的 mapreduce 代码生成一个空的输出文件。代码和输入如下

转载 作者:行者123 更新时间:2023-12-02 21:06:15 24 4
gpt4 key购买 nike

此处的 mapreduce 代码生成一个空的输出文件。代码和输入如下所示。

package temperature;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TemperatureMapper extends Mapper<Text, Text, Text, IntWritable> {

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
if (isValueValid(value.toString())) {
Text key2 = new Text(getStateFromValue(value.toString()));
IntWritable value2 = new IntWritable(getTemperatureFrom(value.toString()));
context.write(key2, value2);
}
}

private boolean isValueValid(final String value) {
// We expect that the value is a String in the form of : State, Temperature. E.g. MP,77
Pattern p = Pattern.compile("\\S\\S\\,\\d+");
Matcher m = p.matcher(value);
return m.matches();
}

private String getStateFromValue(final String value) {
final String[] subvalues = value.split("\\,");
return subvalues[0];
}

private int getTemperatureFrom(final String value) {
final String[] subvalues = value.split("\\,");
return Integer.parseInt(subvalues[1]);
}
}

public class TemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

@Override
protected void reduce(final Text key, final Iterable<IntWritable> values, final Context context) throws IOException, InterruptedException {
int sumOfTemperatures = 0;
int nbValues = 0;
int average=0;
for (IntWritable temperature : values) {
sumOfTemperatures += temperature.get();
nbValues++;
}
average = sumOfTemperatures / nbValues;
context.write(key, new IntWritable(average));
}
}
public class average {

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
/*if (otherArgs.length != 2) {
System.err.println("Usage: Main <in> <out>");
System.exit(-1);
}*/
Job job = new Job(conf, "Calculate average Temperature");
job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

job.setJarByClass(average.class);

job.setMapperClass(TemperatureMapper.class);
job.setReducerClass(TemperatureReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

System.exit(job.waitForCompletion(true) ? 0 : -1);
}

}

该代码适用于输入:
Ujjain  MP,77
Bhopal MP,76
Indore MP,72
Raipur CG,72
Durg CG,75
Raigarth CG,70
Kendujhar OR,69
Bhubaneswar OR,71
Puri OR,76

但不适用于一些随机输入,例如:
hello  VI,6
bye RE,2

它宁可产生一个空的输出文件。

最佳答案

修改您的正则表达式以支持这种输入

    Pattern p = Pattern.compile("[a-zA-Z]*\\s*[a-zA-Z]{2},\\d+$");

此外,您将需要再次拆分才能获得状态
String[] subvalues = value.split("\\,")[0].split(" ");
return subvalues[subvalues.length - 1];

我希望它有所帮助。在我这边,我不得不更改值 LongWritable 中的键类型,我不确定为什么我们这边没有提示,可能是不同的 api 版本
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

关于hadoop - 此处的 mapreduce 代码生成一个空的输出文件。代码和输入如下,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41805710/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com