Hadoop Professionals

A Community for Hadoop Users

Hi All,


I am a beginner of Hadoop. I modified the Inverted Index Code in Yahoo's Tutorial 
(http://developer.yahoo.com/hadoop/tutorial/module4.html#solution), but
I always get errors of "java.io.IOException: Type mismatch in key from
map: expected org.apache.hadoop.io.Text, recieved
org.apache.hadoop.io.LongWritable". Could some people tell me what is
wrong in my code? Thanks a million!


Zhiqiang

----------------------------code starts--------------------------------------

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

public class YahooIndex {

  public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {

    private final static Text word = new Text();
    private final static Text location = new Text();

    public void map(LongWritable key, Text val,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
      String fileName = fileSplit.getPath().getName();
      location.set(fileName);

      String line = val.toString();
      StringTokenizer itr = new StringTokenizer(line.toLowerCase());
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, location);
      }
    }
  }



  public static class MyReducer extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      boolean first = true;
      StringBuilder toReturn = new StringBuilder();
      while (values.hasNext()){
        if (!first)
          toReturn.append(", ");
        first=false;
        toReturn.append(values.next().toString());
      }

      output.collect(key, new Text(toReturn.toString()));
    }
  }


  public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
          Configuration conf = new Configuration();
        Job job = new Job(conf, "Example Hadoop 0.20.1 WordCount");
        job.setJarByClass(YahooIndex.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path("input"));
        FileOutputFormat.setOutputPath(job, new Path("output"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}
-----------------------------------code ends--------------------------------------------------------------------

Tags: LongWritable, missmatch, type

Views: 536

Reply to This

Replies to This Discussion

Unless you explicitly set it, you will get TextInputFormat for your inputformat, the keys are LongWritable.

If you want a text key text writable,

job.setInputFormat(KeyValueTextInputFormat.class) in your main.

or change the key that your mapper takes to LongWritable, and expect the key to be the input file, line number of the value.
I figured out the problem already. Thanks

Reply to Discussion

RSS




Groups

© 2012   Created by Jason Venner.

Badges  |  Report an Issue  |  Terms of Service