gpt4 book ai didi

java - 无法在hadoop.thanks上运行字数统计

转载 作者:行者123 更新时间:2023-12-02 21:37:09 25 4
gpt4 key购买 nike

我试图在 eclipse 中运行hadoop字数统计。但是这有问题;它甚至无法调试。

package test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import test.test.Map2.Combine;

public class test {


public static class Map2 extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String values=line.split(" ")[0]+"\\|"+line.split(" ")[1];
context.write(new Text(" "),new Text(values));
}
//method reduce start
public static class Combine extends Reducer<Text, Text, Text, IntWritable> {
ArrayList<String> top5array= new ArrayList<String>();
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {

//arraylist
while(top5array.get(4)==null)
{
top5array.add(values.iterator().next().toString());
}


while(values.iterator().hasNext())
{
String currentValues=values.iterator().next().toString();
String currentkey=currentValues.split("\\|")[0];
Integer currentnum=Integer.parseInt(currentValues.split("\\|")[1]);

for(int i=0;i<5;i++)
{
Integer numofArray = Integer.parseInt(top5array.get(i).split("\\|")[1]);
if(top5array.get(i) != null && currentnum < numofArray)
{
break;
}
if(i == 4)
{
String currentKeyValuePair = currentkey + currentnum.toString();
top5array.add(5, currentKeyValuePair);
Collections.sort(top5array);
top5array.remove(0);
}
}// for end
}// while end
}//method reduce end
} // Combine end
}
// map end

public static class Reduce2 extends Reducer<Text, Text, Text, Text> {
ArrayList<String> top5array= new ArrayList<String>();
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {


while(top5array.get(4)==null)
{
top5array.add(values.iterator().next().toString());
}


while(values.iterator().hasNext())
{
String currentValues=values.iterator().next().toString();
String currentkey=currentValues.split("\\|")[0];
Integer currentnum=Integer.parseInt(currentValues.split("\\|")[1]);

for(int i=0;i<5;i++)
{
Integer numofArray = Integer.parseInt(top5array.get(i).split("\\|")[1]);
if(top5array.get(i) != null && currentnum < numofArray)
{
break;
}
if(i == 4)
{
String currentKeyValuePair = currentkey + currentnum.toString();
top5array.add(5, currentKeyValuePair);
Collections.sort(top5array);
top5array.remove(0);
}
}
}
String top5StringConca = "";
for(int i=0; i < 5; i++){
top5StringConca = top5StringConca + top5array.get(i);
}
context.write(new Text(" "), new Text(top5StringConca));
}
}



//the second of mapreduce end


public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map2.class);
job.setReducerClass(Reduce2.class);
job.setCombinerClass(Combine.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);

}

}

运行它时出现的问题显示以下异常:
WARN  [main] util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62))
- Unable to load native-hadoop library for your platform using builtin-java classes where applicable`

我该如何解决这个问题?

最佳答案

  • 在项目中添加您的hadoop jar。
  • 如果您已经配置了hadoop,则可以将hdfs指向eclipse。为此,您需要包括依赖项。
  • 在pom.xml内添加hadoop依赖项(如果正在使用maven)。还要添加eclipse的第三方插件。 Here是指南。这些将在 eclipse 中启用 Map-Reduce透 View 。我在项目中添加了以下依赖项:
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-core</artifactId>
    <version>1.2.1</version>
    <scope>compile</scope>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>2.6.0</version>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.6.0</version>
    </dependency>
  • 您将看到依赖项本身包含hadoop jar。现在将取决于您是否要使用 jar 提供的现有配置或默认配置。
  • 现在尝试运行您的hadoop驱动程序类。您可以轻松地在eclipse中调试代码。同样,您的hadoop透 View 也已启用。您可以在此处添加hdfs路径。

  • 您也可以检查 this进行远程调试。

    关于java - 无法在hadoop.thanks上运行字数统计,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31298383/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com