gpt4 book ai didi

Hadoop IO 错误 : Type mismatch in key from map : expected org. apache.hadoop.io.Text,收到 RegexMatcher.CustomKey

转载 作者:可可西里 更新时间:2023-11-01 16:06:29 31 4
gpt4 key购买 nike

我收到以下错误:

java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey

首先,我在 Map-reduce 中定义了一个名为 CustomKey 的自定义数据类型:

public  class CustomKey implements Writable {

public Text userId;
public Text friendId;

public CustomKey() {

this.userId = new Text();
this.friendId = new Text();

}

public CustomKey(String userId, String friendId) {

this.userId = new Text(userId);
this.friendId = new Text(friendId);

}

@Override
public void write(DataOutput out) throws IOException {
userId.write(out);
userId.write(out);
}

@Override
public void readFields(DataInput in) throws IOException {
userId.readFields(in);
friendId.readFields(in);
}



}

然后我创建一个 Mapper SingleClassv2LogMapper

public static class SingleClassv2LogMapper extends Mapper<Object, Text, CustomKey, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();

@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {

Configuration conf = context.getConfiguration();
String regex = conf.get("regex");
String delimeter = conf.get("delimeter");
String currentLine = value.toString();
String tag = RingIdLogParser.parseHashTag(value.toString());
String body = RingIdLogParser.parseBody(value.toString());
if (tag != null) {
if (tag.equals(RegularExpressionBundle.updateMultipleMessageStatus)) {
CustomKey customKey = RingIdLogParser.parseUserFrinedInfo(body);
int messageNo = RingIdLogParser.getMessageCount(body);
context.write(customKey, new IntWritable(messageNo));
}
}
}

}

还有一个 reducer

public static class SingleClassv2LogReducer extends Reducer<CustomKey, IntWritable, Text, IntWritable> {

TextArrayWritable sum = new TextArrayWritable();

@Override
protected void reduce(CustomKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

int sum = 0;
for (IntWritable value : values) {
sum = sum + value.get();

}
String compactUser = key.userId.toString() +" "+ key.friendId.toString();
context.write(new Text(compactUser), new IntWritable(sum));
}

}

我现在该怎么办?请任何人帮助我。

Driver相关代码如下所示

Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Regex Matcher");
job.setJarByClass(SingleClassLogDriverv2.class);
job.setMapperClass(SingleClassv2LogMapper.class);
job.setCombinerClass(SingleClassv2LogCombiner.class);
job.setReducerClass(SingleClassv2LogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapOutputKeyClass(CustomKey.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);

最佳答案

我在使用 Eclipse 为 Map-Reduce 和 Comparability 创建 JAR 时遇到了类似的问题,我的问题是在数字前面打印 390k 数字的单词计数,这与传统的遗留 WordCount 程序不同。这是我的 12 个文件中的数字列表,其中还包含一次冗余。

java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable

此后我更正了,我希望在下面的聚合中得到结果 -

716900482    Seventy One Crore Sixty Nine Lac Four Hundred Eighty Two only.

我已经开发了一个 Maven 构建工具,用于用文字打印数字,因此将该 JAR 明确添加到我的项目中。

Project - CountInWords View

所以,我们开始使用我的程序,它类似于 WordCount 程序但目的不同 -

package com.whodesire.count;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import com.whodesire.numstats.AmtInWords;

public class CountInWords {


public static class NumberTokenizerMapper
extends Mapper <Object, Text, LongWritable, Text> {

private static final Text theOne = new Text("1");
private LongWritable longWord = new LongWritable();

public void map(Object key, Text value, Context context) {

try{
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
longWord.set(Long.parseLong(itr.nextToken()));
context.write(longWord, theOne);
}
}catch(ClassCastException cce){
System.out.println("ClassCastException raiseddd...");
System.exit(0);
}catch(IOException | InterruptedException ioe){
ioe.printStackTrace();
System.out.println("IOException | InterruptedException raiseddd...");
System.exit(0);
}
}
}

public static class ModeReducerCumInWordsCounter
extends Reducer <LongWritable, Text, LongWritable, Text>{
private Text result = new Text();

//This is the user defined reducer function which is invoked for each unique key
public void reduce(LongWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {

/*** Putting the key, which is a LongWritable value,
putting in AmtInWords constructor as String***/
AmtInWords aiw = new AmtInWords(key.toString());
result.set(aiw.getInWords());

//Finally the word and counting is sent to Hadoop MR and thus to target
context.write(key, result);
}
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

/****
*** all random numbers generated inside input files has been
*** generated using url https://andrew.hedges.name/experiments/random/
****/

//Load the configuration files and add them to the the conf object
Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

Job job = new Job(conf, "CountInWords");

//Specify the jar which contains the required classes for the job to run.
job.setJarByClass(CountInWords.class);

job.setMapperClass(NumberTokenizerMapper.class);
job.setCombinerClass(ModeReducerCumInWordsCounter.class);
job.setReducerClass(ModeReducerCumInWordsCounter.class);

//Set the output key and the value class for the entire job
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);

//Set the Input (format and location) and similarly for the output also
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

//Setting the Results to Single Target File
job.setNumReduceTasks(1);

//Submit the job and wait for it to complete
System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

我知道这是一个为时已晚的回复,但希望这也能帮助其他人找到方法,谢谢。

关于Hadoop IO 错误 : Type mismatch in key from map : expected org. apache.hadoop.io.Text,收到 RegexMatcher.CustomKey,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36369953/

31 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com