- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我试图使用hadoop解决这个问题。
使用平均评级查找排名前十的企业。评分最高的业务将排在首位。回想一下 review.csv 文件中的第 4 列代表评分。
我的Java代码是:
package bd;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class TopTenRatedBusiness {
/*
* Mapper Class : BusinessRatingMapper
* Class BusinessRatingMapper parses review.csv file and emits business id and respective rating
*/
public static class BusinessRatingMapper extends Mapper<LongWritable, Text, Text, FloatWritable> {
/*
* Map function that emits a business ID as a key and rating as a value
*/
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
String reviews[] = value.toString().split("::");
/*
* reviews[2] gives business id and reviews[3] gives business rating
*/
context.write(new Text(reviews[2]), new FloatWritable(Float.parseFloat(reviews[3])));
}
}
/*
* Reducer class: TopRatedBusinessReducer
* Class TopRatedBusinessReducer emits top 10 business id with their average rating
*/
static TreeMap<Float, List<Text>> reviewID = new TreeMap<Float, List<Text>>(Collections.reverseOrder());
public static class BusinessRatingReducer extends Reducer<Text, FloatWritable, Text, FloatWritable> {
/*
* Reduce function
*/
public void reduce(Text key, Iterable<FloatWritable> values, Context context)throws IOException, InterruptedException {
float sumOfRatings = 0;
int countOfRatings = 0;
for (FloatWritable value : values) {
sumOfRatings += value.get();
countOfRatings++;
}
Float averageRating = sumOfRatings / countOfRatings;
if (reviewID.containsKey(averageRating)) {
reviewID.get(averageRating).add(new Text(key.toString()));
} else {
List<Text> businessIDList = new ArrayList<Text>();
businessIDList.add(new Text(key.toString()));
/*
* Putting average rating and corresponding business ID
*/
reviewID.put(averageRating, businessIDList);
}
}
@Override
protected void cleanup(Reducer<Text, FloatWritable, Text, FloatWritable>.Context context)throws IOException, InterruptedException {
int count=0;
for(Entry<Float, List<Text>> entry : reviewID.entrySet()) {
if(count > 10){
break;
}
FloatWritable result=new FloatWritable();
result.set(entry.getKey());
for (int i = 0; i <entry.getValue().size(); i++) {
if (count >= 10) {
break;
}
context.write(new Text(entry.getValue().get(i).toString()), result);
count++;
}
}
}
}
/*
* Driver Program
*/
public static void main(String[] args) throws IOException,ClassNotFoundException, InterruptedException, NoSuchMethodException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: TopTenRatedBusiness <in> <out>");
System.exit(2);
}
/*
* Create a job with name "TopTenRatedBusiness"
*/
Job job = new Job(conf, "TopTenRatedBusiness");
job.setJarByClass(TopTenRatedBusiness.class);
job.setMapperClass(BusinessRatingMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FloatWritable.class);
job.setReducerClass(BusinessRatingReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
review.csv file contains the following columns "review_id"::"user_id"::"business_id"::"stars"
'review_id': (a unique identifier for the review)
'user_id': (the identifier of the reviewed business),
'business_id': (the identifier of the authoring user),
'stars': (star rating, integer 1-5),the rating given by the user to a business
17/10/09 21:18:33 INFO input.FileInputFormat: Total input paths to process : 1
17/10/09 21:18:33 INFO util.NativeCodeLoader: Loaded the native-hadoop library
17/10/09 21:18:33 WARN snappy.LoadSnappy: Snappy native library not loaded
17/10/09 21:18:34 INFO mapred.JobClient: Running job: job_201710090351_0033
17/10/09 21:18:35 INFO mapred.JobClient: map 0% reduce 0%
17/10/09 21:18:41 INFO mapred.JobClient: Task Id : attempt_201710090351_0033_m_000000_0, Status : FAILED
java.lang.ArrayIndexOutOfBoundsException: 2
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:37)
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:26)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
17/10/09 21:18:47 INFO mapred.JobClient: Task Id : attempt_201710090351_0033_m_000000_1, Status : FAILED
java.lang.ArrayIndexOutOfBoundsException: 2
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:37)
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:26)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
17/10/09 21:18:52 INFO mapred.JobClient: Task Id : attempt_201710090351_0033_m_000000_2, Status : FAILED
java.lang.ArrayIndexOutOfBoundsException: 2
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:37)
at bd.TopTenRatedBusiness$BusinessRatingMapper.map(TopTenRatedBusiness.java:26)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
17/10/09 21:18:58 INFO mapred.JobClient: Job complete: job_201710090351_0033
17/10/09 21:18:58 INFO mapred.JobClient: Counters: 7
17/10/09 21:18:58 INFO mapred.JobClient: Job Counters
17/10/09 21:18:58 INFO mapred.JobClient: Launched map tasks=4
17/10/09 21:18:58 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=0
17/10/09 21:18:58 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
17/10/09 21:18:58 INFO mapred.JobClient: Failed map tasks=1
17/10/09 21:18:58 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=23391
17/10/09 21:18:58 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
17/10/09 21:18:58 INFO mapred.JobClient: Data-local map tasks=4
0xuZfa0t4MNWd3eIFF02ug::kT43SxDgMGzbeXpO51f0hQ::wbpbaWBfU54JbjLIDwERQA::5.0
bBqVqhOvNgFs8I1Wk68QUQ::T9hGHsbJW9Hw1cJAlIAWmw::4iTRjN_uAdAb7_YZDVHJdg::5.0
fu7TcxnAOdnbdLcyFhMmZg::Z_WAxc4RUpKp3y12BH1bEg::qw5gR8vW7mSOK4VROSwdMA::4.0
LMy8UOKOeh0b9qrz-s1fQA::OlMjqqzWZUv2-62CSqKq_A::81IjU5L-t-QQwsE38C63hQ::4.0
JjyRj9EiBXQTFDQAxRtt4g::fs5bpfk-2pvq2v8S1De5pQ::Hnz1_h_D1eHSRtQqHSCZkw::2.0
最佳答案
您的代码适用于示例输入。
因此,您的数据似乎存在问题,其中会有无法处理的错误行。您可以检查是否有任何标题列,否则您将需要浏览完整文件。
您可以检查的另一件事是您提供的输入目录路径是唯一的 review.CSV 文件,没有别的。
关于java.lang.ArrayIndexOutOfBoundsException : 2 error in mapreduce, Hadoop,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46650737/
我创建了一个程序,可以将 10 个长整型数组转换为“电话号码”格式。例如这样:Solution.createPhoneNumber(new int[] {1, 2, 3, 4, 5, 6, 7, 8,
我正在将一个 csv 的每一行与另一个 csv 的每一行进行比较以查找匹配项。然后,我需要添加第二个 csv 中的一些元素和第一个 csv 中的一些元素,并将其写入新文件。它适用于 csv 的第一行,
我正在尝试解析一个字符串以获得 3 个整数,但我有一个强制关闭并且 LogCat 显示:ArrayIndexOutOfBoundExceptions。 这是我的代码的相关部分: dateMod
我已经创建了以下模式来表示用户和一组线程之间的关联,这些线程按他们的最后一条消息排序(用户已经阅读了哪些线程,哪些没有): CREATE TABLE table(user_id bigint, mes
我读取一个文件并将其添加到列表中,然后读取列表并拆分字符串并进行比较并对其进行处理。 我得到这个异常(exception): Exception in thread "main" java.lang.
使用以下代码时,我偶尔会遇到数组索引越界异常。任何线索?数组的大小始终约为 29-30。 logger.info("devicetripmessageinfo size :{}",deviceMess
我遇到了一个问题,但我没有任何线索来解决它! 问题很简单,我从 XSD 文件生成 JAXB 类。 (一个真正复杂的)。但是当编码发生时,我得到一个数组索引超出范围:[在此处插入随机负数] ja
嘿,stackoverflow 社区已经在这个程序上工作了几天,并且被这个错误困扰了一段时间,无法克服它。想知道是否有人可以提供有关正在发生的事情的见解。感谢大家的回复。 这是我运行程序时的输出: 2
我正在尝试制作一个简单的扫雷器,在 n*n 板上埋下 n*n/3 个地雷。地雷用*标记,空格用0标记。(它还不能作为游戏运行:我正在尝试制作扫雷的“答卷”)请注意,我还没有使用过任何有目的的方法。 我
我遇到了这篇文章中描述的类似问题。那里没有答案 - android intro screen error when add to 8 screen, but not error if 4 screen
我在第 66 行遇到错误 c[rowA][colB] = c[rowA][colB] + a[rowA][colA]*b[colA][colB];。我手动检查了索引,只是无法找出索引出错的地方。非常感
我在项目中使用 MessageDigest 计算 md5 签名,但在性能测试期间它抛出 ArrayIndexOutOfBoundsException。 我发现一些帖子表明这是因为 MessageDig
每当我运行代码时,它都会显示线程“main”中的异常 java.lang.ArrayIndexOutOfBoundsException: 0。我确保我的值(value)没有被超出,但它仍然这么说。你们
因此,我尝试按/、- 和空格分割字符串输入,并且在 dateConversion 方法中,我尝试调用字符串数组中的第三项(称为 terms)。如果我的数组只有 2 个元素,我会收到一个错误,我明白原因
这个问题已经有答案了: What causes a java.lang.ArrayIndexOutOfBoundsException and how do I prevent it? (25 个回答)
这个问题已经有答案了: What causes a java.lang.ArrayIndexOutOfBoundsException and how do I prevent it? (25 个回答)
我在 RegexReverseWords.reverseWords(第 23 行)和 RegexReverseWords.main(第 7 行)的 java.lang.ArrayIndexOutOfB
这个问题已经有答案了: How can I avoid ArrayIndexOutOfBoundsException or IndexOutOfBoundsException? [duplicate]
由于某种原因,我收到 ArrayIndexOutOfBoundsException 错误,我没有尝试访问数组的任何元素,我想做的就是设置大小,并通过引用传递 i.getRGB()。 /* * To
我不知道错误在哪里(插入表)。这是我的代码片段(插入开放寻址哈希表)。线性和双寻址都很好,但是这个(二次函数寻址)就出了问题 Exception in thread "main" java.lang.
我是一名优秀的程序员,十分优秀!