- Writable是hadoop序列化的序列化格式,hadoop定义了这样一个writable接口,一个类要支持可序列化只要实现这个接口即可
- 另外Writable有一个子接口是WritableComparable,WritableComparable是即可实现序列化,也可以对key进行比较,我们在这里可以通过自定义的key,来实现WritableComparable来实现我们的排序功能
练习
- 数据
a 1
a 9
b 3
a 7
b 8
b 10
a 5
- 要求:第一列按照字典顺序进行排序,如果第一列相同,第二列按照升序进行排序
代码
Map类
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class map extends Mapper<LongWritable, Text,data, NullWritable> {
data k = new data();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String [] words = value.toString().split("\t");
k.set(words[0],Integer.parseInt(words[1]));
context.write(k,NullWritable.get());
}
}
reduce类
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class red extends Reducer<data, NullWritable, data, NullWritable> {
@Override
protected void reduce(data key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
自定义数据类
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class data implements WritableComparable<data> {
private String word;
private int num;
public void set(String word, int num) {
this.word = word;
this.num = num;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
@Override
public int compareTo(data o) {
int state = this.word.compareTo(o.word);
if (state==0){
return this.num-o.num;
}
return state;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(word);
dataOutput.writeInt(num);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
word = dataInput.readUTF();
num = dataInput.readInt();
}
@Override
public String toString() {
return word +"\t"+num;
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.File;
public class dri {
public static void main(String[] args) throws Exception {
File file = new File("D:\\WC\\output");
if (file.exists()){
delFile(file);
driver();
}else {
driver();
}
}
public static void delFile(File file) {
File[] files = file.listFiles();
if (files != null && files.length != 0) {
for (int i = 0;i<files.length;i++) {
delFile(files[i]);
}
}
file.delete();
}
public static void driver() throws Exception{
Job job = Job.getInstance(new Configuration());
job.setMapperClass(map.class);
job.setReducerClass(red.class);
job.setJarByClass(dri.class);
job.setMapOutputKeyClass(data.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(data.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, "D:\\WC\\input");
FileOutputFormat.setOutputPath(job, new Path("D:\\WC\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}