7.3.2 实现自己的Hadoop数据类型
实现自定义的Hadoop数据类型具有非常重要的意义。虽然Hadoop已经定义了很多有用的数据类型,但在实际应用中,我们总是需要定义自己的数据类型以满足程序的需要。
我们定义一个简单的整数对<LongWritable, LongWritable>,这个类可以用来记录文章中单词出现的位置,第一个LongWritable代表行数,第二个LongWritable代表它是该行的第几个单词。定义NumPair,如下所示:
package cn.edn.ruc.cloudcomputing.book.chapter07;
import java.io.*;
import org.apache.hadoop.io.*;
public class NumPair implements WritableComparable<NumPair>{
private LongWritable line;
private LongWritable location;
public NumPair(){
set(new LongWritable(0),new LongWritable(0));
}
public void set(LongWritable first, LongWritable second)
{
this.line=first;
this.location=second;
}
public NumPair(LongWritable first, LongWritable second){
set(first, second);
}
public NumPair(int first, int second){
set(new LongWritable(first),new LongWritable(second));
}
public LongWritable getLine(){
return line;
}
public LongWritable getLocation(){
return location;
}
@Override
public void readFields(DataInput in)throws IOException
{
line.readFields(in);
location.readFields(in);
}
@Override
public void write(DataOutput out)throws IOException{
line.write(out);
location.write(out);
}
public boolean equals(NumPair o){
if((this.line==o.line)&&(this.location==o.location))
return true;
return false;
}
@Override
public int hashCode(){
return line.hashCode()*13+location.hashCode();
}
@Override
public int compareTo(NumPair o){
if((this.line==o.line)&&(this.location==o.location))
return 0;
return-1;
}
}