Bean对象

package com.zyd.order;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;



public class OrderBean implements WritableComparable<OrderBean>{

private String orderId; //订单Id
private Double price; //价格



public String getOrderId() {
return orderId;
}

public void setOrderId(String orderId) {
this.orderId = orderId;
}

public Double getPrice() {
return price;
}

public void setPrice(Double price) {
this.price = price;
}


@Override
public String toString() {
return orderId + "\t" + price;
}


public OrderBean(String orderId, Double price) {
super();
this.orderId = orderId;
this.price = price;
}
public OrderBean() {
}

@Override
//反序列化
public void readFields(DataInput in) throws IOException {
this.orderId = in.readUTF();
this.price = in.readDouble();
}

@Override
//序列化
public void write(DataOutput out) throws IOException {
// string类型的方法使用的是UTF
out.writeUTF(orderId);
out.writeDouble(price);
}

@Override
public int compareTo(OrderBean o) {
//两次排序
//1 按照id号进行排序
int comResult = this.orderId.compareTo(o.getOrderId());

//2 相同的id按照价格进行排序

if (comResult == 0) {
comResult = this.price > o.getPrice()?-1:1;
}
return comResult;
}

}

  1. 声明属性
  2. 生成getset方法,
  3. 重写toString,并分割\t
  4. 生成带参和无参构造器
  5. 写序列化和反序列化,并保持一致
  6. 重写ComparaTo 方法
  7. 将此对象与指定的对象进行比较。当该对象小于、等于或大于指定对象时,返回负整数、零或正整数。将此对象与指定的对象进行比较。当该对象小于、等于或大于指定对象时,返回负整数、零或正整数。

Mapper类

package com.zyd.order;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean bean = new OrderBean();

@Override
protected void map(LongWritable key,Text value,Context context)
throws IOException, InterruptedException {
// 1 读取数据
String line = value.toString();
//2 切割数据
String [] fields = line.split("\t");
// Order_0000002 Pdt_03 522.8
//3 封装bean对象
bean.setOrderId(fields[0]);
bean.setPrice(Double.parseDouble(fields[2]));
//4 写出
context.write(bean, NullWritable.get());
}
}

Partitioner分区

package com.zyd.order;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
//输入是Mapper的输出
public class OrderPatitioner extends Partitioner<OrderBean, NullWritable>{

@Override
public int getPartition(OrderBean key, NullWritable value, int numReduceTasks) {
//按照key的orderId的hashCode值分区
return (key.getOrderId().hashCode()&Integer.MAX_VALUE)%numReduceTasks;
}

}

Reducer类

package com.zyd.order;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{

@Override
protected void reduce(
OrderBean bean,
Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//写出
context.write(bean,NullWritable.get());
}
}

驱动类

package com.zyd.order;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{

@Override
protected void reduce(
OrderBean bean,
Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//写出
context.write(bean,NullWritable.get());
}
}

bug分析:

Caused by: java.io.IOException: Illegal partition for Order_0000001 222.8 (1689378336)

partition的个数和reduceTask的个数不一致导致的

但是读取的数据仍然是有问题的,虽然分区中的数据是每一个分区的第一行是订单的中最贵的,冗余其他的数据,引入GroupingComparator分区的概念

分析

bean对象将订单id和价格封装,但是reduceTask由于价格不同reduceTask会认为是不同的key,使用GroupingComparator进行分组,欺骗reduceTask认为是一个key

**优化:**增加GroupingComparator的方法

package com.zyd.order;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class OrderGroupingCompartor extends WritableComparator {

//写一个空参构造 指明传递的对象是谁
public OrderGroupingCompartor () {
super(OrderBean.class,true);
}
//重写比较的方法

@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
//根据订单的id号比较,判断是否是同一组
return aBean.getOrderId().compareTo(bBean.getOrderId());
}
}

主驱动方法,分区注明前增加

// 关联groupingComparator
job.setGroupingComparatorClass(OrderGroupingCompartor.class);