gpt4 book ai didi

java - 无法获得简单 Hadoop mapreduce 程序所需的输出

转载 作者:行者123 更新时间:2023-12-02 21:32:03 24 4
gpt4 key购买 nike

我正在尝试编写这个 mapreduce 程序,它必须从两个文件中获取输入,一个具有职业和状态的详细信息,另一个具有职业和工作增长百分比的详细信息。我使用两个映射器并将它们组合起来,并在我的 reducer 中尝试查看哪些工作的增长率超过 30。理想情况下,我的输出应该是职业,然后是州列表。然而,我只得到职业名称而不是州。我在下面发布了代码和示例输入文件。请指出我做错了什么。谢谢。
(请注意,我提供的输入文件样本只是实际文件的一小部分)。

package com;

import java.io.IOException;

//import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class GrowthState extends Configured implements Tool {

//Parser for Mapper1
public static class StateParser{

private String State,Occupation;

public void parse(String record){

String str[] = record.split("\t");
if(str[4].length() != 0)
setOccupation(str[4]);
else
setOccupation("Default Occupation");

if(str[2].length() != 0)
setState(str[2]);
else
setState("Default State");

}

public void parse(Text record){
parse(record.toString());
}

public String getState() {
return State;
}

public void setState(String state) {
State = state;
}

public String getOccupation() {
return Occupation;
}

public void setOccupation(String occupation) {
Occupation = occupation;
}
}

//Mapper1 - Processing state.txt
public static class GrowthMap1 extends Mapper<LongWritable,Text,Text,Text>{
StateParser sp = new StateParser();
Text outkey = new Text();
Text outvalue = new Text();
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
sp.parse(value);
outkey.set(sp.getOccupation());
outvalue.set("m1\t"+sp.getState());
context.write(outkey,outvalue);
//String str[] = value.toString().split("\t");
//context.write(new Text(str[2]), new Text("m1\t"+str[4]));
}
}

public static class ProjParser{
private String Occupation,percent;

public void parse(String record){
String str[] = record.split("\t");
if(str[0].length() != 0)
setOccupation(str[0]);
else
setOccupation("Default Occupation");

if(str[5].length() != 0)
setPercent(str[5]);
else
setPercent("0");
}

public void parse(Text record){
parse(record.toString());
}

public String getOccupation() {
return Occupation;
}

public void setOccupation(String occupation) {
Occupation = occupation;
}

public String getPercent() {
return percent;
}

public void setPercent(String percent) {
this.percent = percent;
}
}

//Mapper2 - processing projection.txt
public static class GrowthMap2 extends Mapper<LongWritable,Text,Text,Text> {
ProjParser pp = new ProjParser();
Text outkey = new Text();
Text outvalue = new Text();
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
pp.parse(value);
outkey.set(pp.getOccupation());
outvalue.set("m2\t"+pp.getPercent());
context.write(outkey, outvalue);
//String str[] = value.toString().split("\t");
//context.write(new Text(str[0]), new Text("m2\t"+str[5]));
}
}

//Reducer
public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{
Text outvalue = new Text();
public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{
float cent = 0;
String state = "";
for(Text values : value){
String[] str = values.toString().split("\t");
if(str[0].equals("m1")){
state = state + " " + str[1];
}else if(str[0].equals("m2")){
try{
cent = Float.parseFloat(str[1]);
}catch(Exception nf){
cent = 0;
}
}
}
if(cent>=30){
outvalue.set(state);
context.write(key,outvalue );
}
}
}

//Driver

@Override
public int run(String[] args) throws Exception {

Job job = new Job(getConf(), "States of Growth");

job.setJarByClass(GrowthState.class);
job.setReducerClass(GrowthReduce.class);

MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, GrowthMap1.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, GrowthMap2.class);

FileOutputFormat.setOutputPath(job,new Path(args[2]));

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

return job.waitForCompletion(true)?0:1;
}

public static void main(String args[]) throws Exception{

int exitcode = ToolRunner.run(new GrowthState(), args);
System.exit(exitcode);
}

}

示例输入文件 1:
01  AL  Alabama 00-0000 All Occupations total   "1,857,530" 0.4 1000.000    1.00    19.66   "40,890"    0.5 8.30    9.72    14.83   23.95   36.04   "17,260"    "20,220"    "30,850"    "49,810"    "74,950"        
01 AL Alabama 11-0000 Management Occupations major "67,500" 1.1 36.338 0.73 51.48 "107,080" 0.6 24.54 33.09 44.98 62.09 88.43 "51,050" "68,830" "93,550" "129,150" "183,940"
01 AL Alabama 11-1011 Chief Executives detailed "1,080" 4.8 0.580 0.32 97.67 "203,150" 2.5 52.05 67.58 # # # "108,270" "140,570" # # #
01 AL Alabama 11-1021 General and Operations Managers detailed "26,480" 1.5 14.258 0.94 58.00 "120,640" 0.9 27.65 35.76 49.00 71.44 # "57,510" "74,390" "101,930" "148,590" #
01 AL Alabama 11-1031 Legislators detailed "1,470" 8.7 0.790 1.94 * "21,920" 3.5 * * * * * "16,120" "17,000" "18,450" "20,670" "32,820" TRUE
01 AL Alabama 11-2011 Advertising and Promotions Managers detailed 80 16.3 0.042 0.19 44.88 "93,350" 9.5 21.59 30.28 38.92 52.22 74.07 "44,900" "62,980" "80,960" "108,620" "154,060"
01 AL Alabama 11-2021 Marketing Managers detailed 610 11.5 0.329 0.24 61.28 "127,460" 7.4 31.96 37.63 53.39 73.17 # "66,480" "78,280" "111,040" "152,200" #
01 AL Alabama 11-2022 Sales Managers detailed "2,330" 5.4 1.253 0.47 54.63 "113,620" 2.2 27.28 35.42 48.92 67.62 89.42 "56,740" "73,660" "101,750" "140,640" "186,000"
05 AR Arkansas 43-4161 "Human Resources Assistants, Except Payroll and Timekeeping" detailed "1,470" 6.6 1.265 1.26 17.25 "35,870" 1.5 11.09 13.54 17.11 20.74 23.30 "23,060" "28,170" "35,590" "43,150" "48,450"
05 AR Arkansas 43-4171 Receptionists and Information Clerks detailed "7,080" 3.3 6.109 0.84 11.26 "23,420" 0.8 8.14 9.19 10.87 13.09 14.94 "16,940" "19,110" "22,600" "27,230" "31,070"
05 AR Arkansas 43-4181 Reservation and Transportation Ticket Agents and Travel Clerks detailed 590 23.6 0.510 0.50 12.61 "26,220" 6.1 8.99 9.81 10.88 14.82 20.59 "18,710" "20,400" "22,630" "30,830" "42,830"
05 AR Arkansas 43-4199 "Information and Record Clerks, All Other" detailed 920 4.7 0.795 0.61 18.45 "38,370" 1.8 13.59 15.33 18.49 21.35 23.86 "28,270" "31,880" "38,470" "44,410" "49,630"
05 AR Arkansas 43-5011 Cargo and Freight Agents detailed 480 16.5 0.418 0.73 * * * * * * * * * * * * *
05 AR Arkansas 43-5021 Couriers and Messengers detailed 510 12.4 0.444 0.84 11.92 "24,790" 2.1 8.73 9.91 11.26 13.49 16.03 "18,160" "20,620" "23,420" "28,060" "33,350"

示例输入文件 2:
Management occupations  11-0000 "8,861.5"   "9,498.0"   636.6   7.2 22.2    "2,586.7"   "$93,910"   —   —   — 
Top executives 11-1000 "2,361.5" "2,626.8" 265.2 11.2 3.3 717.4 "$99,550" — — —
Chief executives 11-1011 330.5 347.9 17.4 5.3 17.7 87.8 "$168,140" Bachelor's degree 5 years or more None
General and operations managers 11-1021 "1,972.7" "2,216.8" 244.1 12.4 1.0 613.1 "$95,440" Bachelor's degree Less than 5 years None
Legislators 11-1031 58.4 62.1 3.7 6.4 — 16.5 "$19,780" Bachelor's degree Less than 5 years None
"Advertising, marketing, promotions, public relations, and sales managers" 11-2000 637.4 700.5 63.1 9.9 3.4 203.3 "$107,950" — — —
Advertising and promotions managers 11-2011 35.5 38.0 2.4 6.9 17.8 13.4 "$88,590" Bachelor's degree Less than 5 years None
Marketing and sales managers 11-2020 539.8 592.5 52.7 9.8 2.6 168.6 "$110,340" — — —
Marketing managers 11-2021 180.5 203.4 22.9 12.7 2.6 61.7 "$119,480" Bachelor's degree 5 years or more None
Sales managers 11-2022 359.3 389.0 29.8 8.3 2.7 106.9 "$105,260" Bachelor's degree Less than 5 years None
Public relations and fundraising managers 11-2031 62.1 70.1 8.0 12.9 1.6 21.3 "$95,450" Bachelor's degree 5 years or more None
Operations specialties managers 11-3000 "1,647.5" "1,799.7" 152.1 9.2 3.3 459.1 "$100,720" — — —
Administrative services managers 11-3011 280.8 315.0 34.2 12.2 0.1 79.9 "$81,080" Bachelor's degree Less than 5 years None
Computer and information systems managers 11-3021 332.7 383.6 50.9 15.3 3.1 97.1 "$120,950" Bachelor's degree 5 years or more None
Financial managers 11-3031 532.1 579.2 47.1 8.9 5.1 146.9 "$109,740" Bachelor's degree 5 years or more None
Industrial production managers 11-3051 172.7 168.6 -4.1 -2.4 6.1 31.4 "$89,190" Bachelor's degree 5 years or more None
Purchasing managers 11-3061 71.9 73.4 1.5 2.1 0.3 17.3 "$100,170" Bachelor's degree 5 years or more None
"Transportation, storage, and distribution managers" 11-3071 105.2 110.3 5.1 4.9 4.8 29.1 "$81,830" High school diploma or equivalent 5 years or more None
Compensation and benefits managers 11-3111 20.7 21.4 0.6 3.1 — 6.1 "$95,250" Bachelor's degree 5 years or more None
Human resources managers 11-3121 102.7 116.3 13.6 13.2 1.0 40.6 "$99,720" Bachelor's degree 5 years or more None
Training and development managers 11-3131 28.6 31.8 3.2 11.2 — 10.7 "$95,400" Bachelor's degree 5 years or more None
Other management occupations 11-9000 "4,215.0" "4,371.0" 156.1 3.7 43.1 "1,207.0" "$81,940" — — —

最佳答案

你的 reducer 有问题。

错误代码如下所示。下面的循环被调用一个特定键的所有值(例如,对于“广告和促销经理”,它被调用两次。一次是值“阿拉巴马州”,另一次是值“6.9”)。问题是,你把 if(cent >= 30)声明,在 for 之外环形。它应该在里面,用于匹配 key 。

  for(Text values : value){
String[] str = values.toString().split("\t");
if(str[0].equals("m1")){
state = state + " " + str[1];
}else if(str[0].equals("m2")){
try{
cent = Float.parseFloat(str[1]);
}catch(Exception nf){
cent = 0;
}
}
}
if(cent>=30){
outvalue.set(state);
context.write(key,outvalue );
}

以下代码工作正常。
//Reducer
public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{
Text outvalue = new Text();
HashMap<String, String> stateMap = new HashMap<String, String>();


public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{
float cent = 0;

for(Text values : value){
String[] str = values.toString().split("\t");

if(str[0].equals("m1")){
stateMap.put(key.toString().toLowerCase(), str[1]);
}
else if(str[0].equals("m2")){
try{
cent = Float.parseFloat(str[1]);
if(stateMap.containsKey(key.toString().toLowerCase()))
{
if(cent>30) {
outvalue.set(stateMap.get(key.toString().toLowerCase()));
context.write(key, outvalue);
}
stateMap.remove(key.toString());
}
}catch(Exception nf){
cent = 0;
}
}
}
}
}

逻辑是:
  • 当你遇到一个状态(值“m1”)时,你把它放在状态图中。
  • 下次,当您遇到具有相同键(值“m2”)的百分比时,您检查该状态是否已经在 map 中。如果是,则输出键/值。
  • 关于java - 无法获得简单 Hadoop mapreduce 程序所需的输出,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33874474/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com