gpt4 book ai didi

java - 如何在 Mapper 中更新 MapReduce 作业参数

转载 作者:可可西里 更新时间:2023-11-01 16:53:50 26 4
gpt4 key购买 nike

我想更新我在 Mapper 类中工作时设置的参数(在 Driver 类中)。

我试过,

context.getConfiguration().set("arg", "updatedvalue")

映射器内部。它确实更新了它,但 reducer 的输出全为零。

请帮忙。

映射器:-

public class RecMap extends Mapper<LongWritable, Text, Text, Text> {
public static TreeMap<String,Integer> co_oc_mat=new TreeMap<String,Integer>();
public static HashMap<String,Float> user_scoring_mat=new HashMap<String,Float>();
public static TreeMap<String,Float> sorted_user_scoring_mat=new TreeMap<String,Float>();
public static ArrayList<String> vals=new ArrayList<String>();
public static ArrayList<Integer> unique_items=new ArrayList<Integer>();
public static ArrayList<Integer> unique_users=new ArrayList<Integer>();
public static int a=0;
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
++a;
String b=value.toString();
vals.add(b);
String[] parts=b.split("\\,");
user_scoring_mat.put(parts[0]+","+parts[1], Float.parseFloat(parts[2]));
}
@Override
public void cleanup(Context context) throws IOException, InterruptedException{
co_oc_mat.putAll(new get_co_oc_mat().get(vals, a));
unique_users.addAll(new get_unique_users().get(vals, a));
unique_items.addAll(new get_unique_items().get(vals, a));
for(int i=0;i<unique_users.size();i++){
for(int j=0;j<unique_items.size();j++){
if(!user_scoring_mat.containsKey(unique_users.get(i)+","+unique_items.get(j))){
user_scoring_mat.put(unique_users.get(i)+","+unique_items.get(j), 0.0f);
}
}
}
sorted_user_scoring_mat.putAll(user_scoring_mat);
String prev="null";int row_num=-1;String value="A";
String prev2="null";int col_num=-1;String value2="B";

//Transmitting co_oc_mat
for(Entry<String, Integer> entry: co_oc_mat.entrySet()){
String check_val=entry.getKey().split("\\,")[0];
if(!prev.contentEquals(check_val)){
if(row_num==-1){
prev=check_val;
++row_num;
}
else{
for(int i=0;i<unique_users.size();i++){
String key=row_num+","+i;
context.write(new Text(key), new Text(value));
}
value="A";
prev=check_val;
++row_num;
}
}
value=value+","+entry.getValue();
}
for(int i=0;i<unique_users.size();i++){
String key=row_num+","+i;
context.write(new Text(key), new Text(value));
}

//Transmitting sorted_user_scoring_mat
for(Entry<String, Float> entry: sorted_user_scoring_mat.entrySet()){
//context.write(new Text(entry.getKey()), new Text(String.valueOf(entry.getValue())));
String check_val=entry.getKey().split("\\,")[0];
if(!prev2.contentEquals(check_val)){
if(col_num==-1){
prev2=check_val;
++col_num;
}
else{
for(int i=0;i<unique_items.size();i++){
String key=i+","+col_num;
context.write(new Text(key), new Text(value2));
}
value2="B";
prev2=check_val;
++col_num;
}
}
value2=value2+","+entry.getValue();
}
for(int i=0;i<unique_items.size();i++){
String key=i+","+col_num;
context.write(new Text(key), new Text(value2));
}
context.getConfiguration().setInt("n", unique_items.size());
}
}

reducer :-

import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class RecReduce extends
Reducer<Text, Text, Text, Text> {
public static int n=0;
@Override
public void setup(Context context) throws IOException, InterruptedException{
n=context.getConfiguration().getInt("n", 1);
}
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String[] value;
HashMap<Integer, Float> hashA = new HashMap<Integer, Float>();
HashMap<Integer, Float> hashB = new HashMap<Integer, Float>();
for (Text val : values) {
value = val.toString().split(",");
if (value[0].equals("A")) {
for(int z=1;z<=n;z++){
hashA.put(z, Float.parseFloat(value[z]));}
} else{
for(int a=1;a<=n;a++){
hashB.put(a, Float.parseFloat(value[a]));}
}
}
float result = 0.0f;
float a_ij;
float b_jk;
for (int j=1;j<=n;j++) {
a_ij = hashA.containsKey(j) ? hashA.get(j) : 0.0f;
b_jk = hashB.containsKey(j) ? hashB.get(j) : 0.0f;
result +=a_ij*b_jk;
}
context.write(null, new Text(key.toString() + "," + Float.toString(result)));
}
}

司机:-

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class RecDriver {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.setInt("n", 0);
Job job = new Job(conf, "Recommendations_CollaborativeFiltering");
job.setJarByClass(RecDriver.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(RecMap.class);
//job.setNumReduceTasks(0);
//Don't use combiner if there is no scope of combining the output. Otherwise the job will get stuck.
//job.setCombinerClass(RecReduce.class);
job.setReducerClass(RecReduce.class);

FileInputFormat.addInputPath(job, new Path("/home/gts1/Desktop/recommendation.txt"));

FileOutputFormat.setOutputPath(job, new Path("/home/gts1/Desktop/rec1_out"));
System.exit(job.waitForCompletion(true)?0:1);
}
}

这是我得到的输出:-

0,0,0.0
0,1,0.0
0,2,0.0
0,3,0.0
0,4,0.0
1,0,0.0
1,1,0.0
1,2,0.0
1,3,0.0
1,4,0.0
2,0,0.0
2,1,0.0
2,2,0.0
2,3,0.0
2,4,0.0
3,0,0.0
3,1,0.0
3,2,0.0
3,3,0.0
3,4,0.0

最佳答案

如 Hadoop API 文档中所述 JobContext提供一个在任务运行时提供给任务的作业的只读 View 。因此,应该可以在 mapper/reducer 方法的上下文中获取参数值,但是不设置它们。

当必须在不同的过程机器之间使用这种协调时,那么 Apache ZooKeeper必须用于在 mapper 中设置值并在 reducer 中获取相同的值。

关于java - 如何在 Mapper 中更新 MapReduce 作业参数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31021635/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com