gpt4 book ai didi

java - org.apache.hadoop.io.Text无法转换为org.apache.hive.hcatalog.data.HCatRecord

转载 作者:行者123 更新时间:2023-12-02 22:05:19 31 4
gpt4 key购买 nike

我编写了一个脚本,该脚本可以从HBase中获取数据,将其解析然后保存到Hive中。但我收到此错误:

org.apache.hadoop.mapred.YarnChild: Exception running child : java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hive.hcatalog.data.HCatRecord
at org.apache.hive.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:53)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:558)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.write(WrappedReducer.java:105)
at org.apache.hadoop.mapreduce.Reducer.reduce(Reducer.java:150)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)

我知道问题是化简键,值和 job.setOutputKeyClassjob.setOutputValueClass的一些愚蠢的不匹配,但我找不到它:(。请帮助我,这是我的代码:
public class DumpProductViewsAggHive extends Configured implements Tool {

public static enum LOCAL_COUNTER {
IGNORED, VALID, INVALID
}

private static final String NAME = "DumpProductViewsAggHive"; //Change the name of the job here
private static final String SEPARATOR = "/t"; //Change the separator here

private String dateFrom; //Start date - HBase MR applicable
private String dateTo; //Ending date - HBase MR applicable
private String fileOutput; //output file path
private String table = "we_json"; //default HBase table
private int caching = 500; //default HBase caching


/**
* Map phase HBase
*/
public static class MapHBase extends TableMapper<Text, Text> {
private Text key_out = new Text();
private Text value_out = new Text();

private JSONParser parser = new JSONParser();
private DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
private String day;
private Date date = new Date();
private Double ts = new Double(0);

public void map(ImmutableBytesWritable row, Result value,
Context context) throws IOException, InterruptedException {

String b = new String(value.getValue(Bytes.toBytes("d"),
Bytes.toBytes("j")));
JSONObject obj;

try {
obj = (JSONObject) parser.parse(b);
if (obj.get("e").equals("pview_bcn")) {
ts = Double.parseDouble(obj.get("ts").toString());
ts = ts * 1000;
date.setTime(Math.round(ts));
day = formatter.format(date);

key_out.set(obj.get("sid").toString());
value_out.set(obj.get("variant_id") + SEPARATOR + obj.get("shop")
+ SEPARATOR + obj.get("status") + SEPARATOR + day
+ SEPARATOR + "D");
context.getCounter(LOCAL_COUNTER.VALID).increment(1);
context.write(key_out, value_out);
} else {
context.getCounter(LOCAL_COUNTER.IGNORED).increment(1);
}
} catch (Exception pe) {
// ignore value
context.getCounter(LOCAL_COUNTER.INVALID).increment(1);
return;
}

}
}


/**
* Reduce phase
*/
public static class Reduce extends Reducer<Text, Text, NullWritable, HCatRecord>{

public void reduce (Iterable<Text> key, Text value, Context context)
throws IOException, InterruptedException{

Set<Text> sidSet = new HashSet<Text>();
while (key.iterator().hasNext()) {
sidSet.add(key.iterator().next());
}
String[] tokens = value.toString().split( SEPARATOR );


HCatRecord record = new DefaultHCatRecord(6);
record.set(0, tokens[0].toString());
record.set(1, tokens[1].toString());
record.set(2, tokens[2].toString());
record.set(3, tokens[3].toString());
record.set(4, tokens[4].toString());
record.set(5, sidSet.size());
context.write(NullWritable.get(), record);
}
}

public void getParams(String[] otherArgs) throws ParseException {
DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
Calendar cal = Calendar.getInstance();
int i = 0;

/*
* Loop parameters
*/
while (i<otherArgs.length) {
// get parameter -d query only one day. HBase applicable.
if (otherArgs[i].equals("-d")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateFrom = Long.toHexString(cal.getTimeInMillis()/1000);
cal.add(Calendar.DATE, 1);
dateTo = Long.toHexString(cal.getTimeInMillis()/1000);
System.out.println("Day translated to start: " + dateFrom + "; End: " + dateTo);
}
// get start date -f parameter. HBase applicable.
if (otherArgs[i].equals("-f")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateFrom = Long.toHexString(cal.getTimeInMillis() / 1000);
System.out.println("From: " + dateFrom);
}
// get end date -t parameter. HBase applicable.
if (otherArgs[i].equals("-t")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateTo = Long.toHexString(cal.getTimeInMillis() / 1000);
System.out.println("To: " + dateTo);
}

// get output folder -o parameter.
if (otherArgs[i].equals("-o")) {
fileOutput = otherArgs[++i];
System.out.println("Output: " + fileOutput);
}

// get caching -c parameter. HBase applicable.
if (otherArgs[i].equals("-c")) {
caching = Integer.parseInt(otherArgs[++i]);
System.out.println("Caching: " + caching);
}

// get table name -tab parameter. HBase applicable.
if (otherArgs[i].equals("-tab")) {
table = otherArgs[++i];
System.out.println("Table: " + table);
}

i++;
}
}

/**
*
* @param fileInput
* @param dateFrom
* @param dateTo
* @param job
* @param caching
* @param table
* @throws IOException
*/
public void getInput(String fileInput, String dateFrom, String dateTo, Job job, int caching, String table) throws IOException {
// If the source is from Hbase
if (fileInput == null) {
/**
* HBase source
*/
// If date is not defined
if (dateFrom == null || dateTo == null) {
System.err.println("Start date or End Date is not defined.");
return;
}
System.out.println("HBase table used as a source.");
Scan scan = new Scan(Bytes.toBytes(dateFrom), Bytes.toBytes(dateTo));
scan.setCaching(caching); // set Caching, when the table is small it is better to use bigger number. Default scan is 1
scan.setCacheBlocks(false); // do not set true for MR jobs
scan.addColumn(Bytes.toBytes("d"), Bytes.toBytes("j"));

TableMapReduceUtil.initTableMapperJob(
table, //name of table
scan, //instance of scan
MapHBase.class, //mapper class
Text.class, //mapper output key
Text.class, //mapper output value
job);
}
}

/**
* Tool implementation
*/
@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {

// Create configuration
Configuration conf = this.getConf();
String databaseName = null;
String tableName = "test";

// Parse arguments
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
getParams(otherArgs);

// It is better to specify zookeeper quorum in CLI parameter -D hbase.zookeeper.quorum=zookeeper servers
conf.set( "hbase.zookeeper.quorum",
"cz-dc1-s-132.mall.local,cz-dc1-s-133.mall.local,"
+ "cz-dc1-s-134.mall.local,cz-dc1-s-135.mall.local,"
+ "cz-dc1-s-136.mall.local");

// Create job
Job job = Job.getInstance(conf, NAME);
job.setJarByClass(DumpProductViewsAggHive.class);


// Setup MapReduce job
job.setReducerClass(Reducer.class);
//job.setNumReduceTasks(0); // If reducer is not needed

// Specify key / value
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DefaultHCatRecord.class);

// Input
getInput(null, dateFrom, dateTo, job, caching, table);

// Output
// Ignore the key for the reducer output; emitting an HCatalog record as value
job.setOutputFormatClass(HCatOutputFormat.class);

HCatOutputFormat.setOutput(job, OutputJobInfo.create(databaseName, tableName, null));
HCatSchema s = HCatOutputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);

// Execute job and return status
return job.waitForCompletion(true) ? 0 : 1;
}

/**
* Main
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new DumpProductViewsAggHive(), args);
System.exit(res);
}

}

最佳答案

Similarly to the question I answered a few minutes ago,您在定义reducer错误:

@Override
public void reduce (Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException

请使用@Override批注使编译器为您发现此错误。

关于java - org.apache.hadoop.io.Text无法转换为org.apache.hive.hcatalog.data.HCatRecord,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24575639/

31 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com