7.3.2 实现自己的Hadoop数据类型

实现自定义的Hadoop数据类型具有非常重要的意义。虽然Hadoop已经定义了很多有用的数据类型,但在实际应用中,我们总是需要定义自己的数据类型以满足程序的需要。

我们定义一个简单的整数对<LongWritable, LongWritable>,这个类可以用来记录文章中单词出现的位置,第一个LongWritable代表行数,第二个LongWritable代表它是该行的第几个单词。定义NumPair,如下所示:


package cn.edn.ruc.cloudcomputing.book.chapter07;

import java.io.*;

import org.apache.hadoop.io.*;

public class NumPair implements WritableComparable<NumPair>{

private LongWritable line;

private LongWritable location;

public NumPair(){

set(new LongWritable(0),new LongWritable(0));

}

public void set(LongWritable first, LongWritable second)

{

this.line=first;

this.location=second;

}

public NumPair(LongWritable first, LongWritable second){

set(first, second);

}

public NumPair(int first, int second){

set(new LongWritable(first),new LongWritable(second));

}

public LongWritable getLine(){

return line;

}

public LongWritable getLocation(){

return location;

}

@Override

public void readFields(DataInput in)throws IOException

{

line.readFields(in);

location.readFields(in);

}

@Override

public void write(DataOutput out)throws IOException{

line.write(out);

location.write(out);

}

public boolean equals(NumPair o){

if((this.line==o.line)&&(this.location==o.location))

return true;

return false;

}

@Override

public int hashCode(){

return line.hashCode()*13+location.hashCode();

}

@Override

public int compareTo(NumPair o){

if((this.line==o.line)&&(this.location==o.location))

return 0;

return-1;

}

}