gpt4 book ai didi

java - Hadoop Mapreduce 作业卡在 map 上 100% 减少 51%

转载 作者:可可西里 更新时间:2023-11-01 15:02:34 30 4
gpt4 key购买 nike

所以,我正在某处寻找一个无限循环,我不知道是否还有其他原因会导致这种情况。我正在使用四个集群节点,所以我很确定不会缺少 RAM,正如其他同类问题中所建议的那样。

我的代码:

package org.myorg;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import util.hashing.*;



public class LatLong {


public static class Map extends Mapper<Object, Text, Text, Text> {
//private final static IntWritable one = new IntWritable(1);


public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] longLatArray = line.split(",");
double longi = Double.parseDouble(longLatArray[0]);
double lat = Double.parseDouble(longLatArray[1]);
//List<Double> origLatLong = new ArrayList<Double>(2);
//origLatLong.add(lat);
//origLatLong.add(longi);
Geohash inst = Geohash.getInstance();
//encode is the library's encoding function
String hash = inst.encode(lat,longi);
//Using the first 5 characters just for testing purposes
//Need to find the right one later
int accuracy = 4;
//hash of the thing is shortened to whatever I figure out
//to be the right size of each tile
Text shortenedHash = new Text(hash.substring(0,accuracy));
Text origHash = new Text(hash);

context.write(shortenedHash, origHash);
}
}

public static class Reduce extends Reducer<Text, Text, Text, Text> {

private IntWritable totalTileElementCount = new IntWritable();
private Text latlongimag = new Text();
private Text dataSeparator = new Text();

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int elementCount = 0;
boolean first = true;
Iterator<Text> it = values.iterator();
String lat = new String();
String longi = new String();
Geohash inst = Geohash.getInstance();

while (it.hasNext()) {
elementCount = elementCount+1;
if(first)
{
double[] doubleArray = (inst.decode(it.next().toString()));
lat = Double.toString(doubleArray[0]);
longi = Double.toString(doubleArray[1]);
first = false;

}



}
totalTileElementCount.set(elementCount);
//Geohash inst = Geohash.getInstance();

String mag = totalTileElementCount.toString();

latlongimag.set(lat+","+ longi +","+mag+",");
dataSeparator.set("");
context.write(latlongimag, dataSeparator );
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setJarByClass(LatLong.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);
}

}

最佳答案

内部

while (it.hasNext()) {
elementCount = elementCount+1;
if(first)
{
double[] doubleArray = (inst.decode(it.next().toString()));
lat = Double.toString(doubleArray[0]);
longi = Double.toString(doubleArray[1]);
first = false;
}
}

您设置了 first = false; 所以在接下来的 while (it.hasNext()) 循环迭代中 if(first) 不是进入并且 it.next() 不会再被调用,所以如果 it 有多个元素 it.hasNext() 将始终返回 true你将永远不会离开这个 while 循环。

关于java - Hadoop Mapreduce 作业卡在 map 上 100% 减少 51%,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24089330/

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com