Hi All,
I am a beginner of Hadoop. I modified the Inverted Index Code in Yahoo's
Tutorial
(
http://developer.yahoo.com/hadoop/tutorial/module4.html#solution), but
I always get errors of "java.io.IOException: Type mismatch in key from
map: expected org.apache.hadoop.io.Text, recieved
org.apache.hadoop.io.LongWritable". Could some people tell me what is
wrong in my code? Thanks a million!
Zhiqiang
----------------------------code
starts--------------------------------------
import java.io.IOException;
import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reporter;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;public class YahooIndex { public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
private final static Text word = new Text(); private final static Text location = new Text(); public void map(LongWritable key, Text val, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { FileSplit fileSplit = (FileSplit)reporter.getInputSplit(); String fileName = fileSplit.getPath().getName(); location.set(fileName); String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, location); } } } public static class MyReducer extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { boolean first = true; StringBuilder toReturn = new StringBuilder(); while (values.hasNext()){ if (!first) toReturn.append(", "); first=false; toReturn.append(values.next().toString()); } output.collect(key, new Text(toReturn.toString())); } } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration(); Job job = new Job(conf, "Example Hadoop 0.20.1 WordCount"); job.setJarByClass(YahooIndex.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); System.exit(job.waitForCompletion(true) ? 0 : 1); }} -----------------------------------code ends--------------------------------------------------------------------
Tags: LongWritable, missmatch, type
Share
Facebook
-
▶ Reply to This