Bean对象
package com.zyd.order;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class OrderBean implements WritableComparable<OrderBean>{
private String orderId; //订单Id
private Double price; //价格
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public Double getPrice() {
return price;
}
public void setPrice(Double price) {
this.price = price;
}
@Override
public String toString() {
return orderId + "\t" + price;
}
public OrderBean(String orderId, Double price) {
super();
this.orderId = orderId;
this.price = price;
}
public OrderBean() {
}
@Override
//反序列化
public void readFields(DataInput in) throws IOException {
this.orderId = in.readUTF();
this.price = in.readDouble();
}
@Override
//序列化
public void write(DataOutput out) throws IOException {
// string类型的方法使用的是UTF
out.writeUTF(orderId);
out.writeDouble(price);
}
@Override
public int compareTo(OrderBean o) {
//两次排序
//1 按照id号进行排序
int comResult = this.orderId.compareTo(o.getOrderId());
//2 相同的id按照价格进行排序
if (comResult == 0) {
comResult = this.price > o.getPrice()?-1:1;
}
return comResult;
}
}
- 声明属性
- 生成getset方法,
- 重写toString,并分割\t
- 生成带参和无参构造器
- 写序列化和反序列化,并保持一致
- 重写ComparaTo 方法
- 将此对象与指定的对象进行比较。当该对象小于、等于或大于指定对象时,返回负整数、零或正整数。将此对象与指定的对象进行比较。当该对象小于、等于或大于指定对象时,返回负整数、零或正整数。
Mapper类
package com.zyd.order;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean bean = new OrderBean();
@Override
protected void map(LongWritable key,Text value,Context context)
throws IOException, InterruptedException {
// 1 读取数据
String line = value.toString();
//2 切割数据
String [] fields = line.split("\t");
// Order_0000002 Pdt_03 522.8
//3 封装bean对象
bean.setOrderId(fields[0]);
bean.setPrice(Double.parseDouble(fields[2]));
//4 写出
context.write(bean, NullWritable.get());
}
}
Partitioner分区
package com.zyd.order;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
//输入是Mapper的输出
public class OrderPatitioner extends Partitioner<OrderBean, NullWritable>{
@Override
public int getPartition(OrderBean key, NullWritable value, int numReduceTasks) {
//按照key的orderId的hashCode值分区
return (key.getOrderId().hashCode()&Integer.MAX_VALUE)%numReduceTasks;
}
}
Reducer类
package com.zyd.order;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(
OrderBean bean,
Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//写出
context.write(bean,NullWritable.get());
}
}
驱动类
package com.zyd.order;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(
OrderBean bean,
Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//写出
context.write(bean,NullWritable.get());
}
}
bug分析:
Caused by: java.io.IOException: Illegal partition for Order_0000001 222.8 (1689378336)
partition的个数和reduceTask的个数不一致导致的
但是读取的数据仍然是有问题的,虽然分区中的数据是每一个分区的第一行是订单的中最贵的,冗余其他的数据,引入GroupingComparator分区的概念
分析
bean对象将订单id和价格封装,但是reduceTask由于价格不同reduceTask会认为是不同的key,使用GroupingComparator进行分组,欺骗reduceTask认为是一个key
**优化:**增加GroupingComparator的方法
package com.zyd.order;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroupingCompartor extends WritableComparator {
//写一个空参构造 指明传递的对象是谁
public OrderGroupingCompartor () {
super(OrderBean.class,true);
}
//重写比较的方法
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
//根据订单的id号比较,判断是否是同一组
return aBean.getOrderId().compareTo(bBean.getOrderId());
}
}
主驱动方法,分区注明前增加
// 关联groupingComparator
job.setGroupingComparatorClass(OrderGroupingCompartor.class);