MapReduce的reduce端join

原创

wx61090d1892228 2021-08-04 10:31:38 博主文章分类：hadoop ©著作权

文章标签 apache hadoop ide mapreduce java 文章分类 代码人生

©著作权归作者所有：来自51CTO博客作者wx61090d1892228的原创作品，请联系作者获取转载授权，否则将追究法律责任

参考了一个博客：https://www.liangzl.com/get-article-detail-131008.html
做法是建一个writable的bean，用来装载值
对于不同类型的表，通过FileInputFormat.setInputPaths(job,input);方法读取一批文件，根据文件名来判断是哪个表。

JoinBean

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class JoinBean implements Writable {
    private String orderId;
    private String userId;
    private String userName;
    private int userAge;
    private String userFriend;
    private String tableName;

    public void set(String orderId, String userId, String userName, int userAge, String userFriend,String tableName) {
        this.orderId = orderId;
        this.userId = userId;
        this.userName = userName;
        this.userAge = userAge;
        this.userFriend = userFriend;
        this.tableName=tableName;
    }

    public String getOrderId() {
        return orderId;
    }

    public void setOrderId(String orderId) {
        this.orderId = orderId;
    }

    public String getUserId() {
        return userId;
    }

    public void setUserId(String userId) {
        this.userId = userId;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public int getUserAge() {
        return userAge;
    }

    public void setUserAge(int userAge) {
        this.userAge = userAge;
    }

    public String getUserFriend() {
        return userFriend;
    }

    public void setUserFriend(String userFriend) {
        this.userFriend = userFriend;
    }

    public String getTableName() {
        return tableName;
    }

    public void setTableName(String tableName) {
        this.tableName = tableName;
    }

    @Override
    public String toString() {
        return this.orderId+","+this.userId+","+this.userAge
                +","+this.userName+","+this.userFriend;
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.orderId);
        dataOutput.writeUTF(this.userId);
        dataOutput.writeUTF(this.userName);
        dataOutput.writeInt(this.userAge);
        dataOutput.writeUTF(this.userFriend);
        dataOutput.writeUTF(this.tableName);
    }

    public void readFields(DataInput dataInput) throws IOException {
        this.orderId=dataInput.readUTF();
        this.userId=dataInput.readUTF();
        this.userName=dataInput.readUTF();
        this.userAge=dataInput.readInt();
        this.userFriend=dataInput.readUTF();
        this.tableName=dataInput.readUTF();
    }
}

ReduceSideJoin

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;

public class ReduceSideJoin {
    //每次读一个文件都会新建一个对象，然后调用setup方法，然后反复调用里面的mapper函数
    public static class ReduceSideJoinMapper extends Mapper<LongWritable, Text,Text,JoinBean> {
        String fileName =null;
        JoinBean bean = new JoinBean();
        Text k=new Text();
        /**
         * maptask在做数据处理的时候，会先调用一次setup(只会调用一次)
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit inputSplit = (FileSplit) context.getInputSplit();
            fileName=inputSplit.getPath().getName();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] fields = value.toString().split(",");
            if (fileName.startsWith("order")){  //通过文件名来判断
                bean.set(fields[0],fields[1],"NULL",-1,"NULL","order");
            }else{
                bean.set("NULL",fields[0],fields[1],Integer.parseInt(fields[2]),fields[3],"user");
            }
            k.set(bean.getUserId());
            context.write(k,bean);

        }
    }
    public static class ReducerSideJoinReduce extends Reducer<Text,JoinBean,JoinBean, NullWritable> {

        @Override
        protected void reduce(Text key, Iterable<JoinBean> beans, Context context) throws IOException, InterruptedException {
            ArrayList<JoinBean> orderList = new ArrayList<JoinBean>();
            JoinBean userBean=null;
            try {
                for (JoinBean bean:beans){
                    //区分两类数据
                    if("order".equals(bean.getTableName())){
                        JoinBean newBean = new JoinBean();
                        BeanUtils.copyProperties(newBean,bean);
                        orderList.add(newBean);
                    }else{
                        userBean=new JoinBean();
                        BeanUtils.copyProperties(userBean,bean);
                    }
                }
                //拼接数据userBean数据加入到orderBean
                for(JoinBean bean:orderList){
                    bean.setUserAge(userBean.getUserAge());
                    bean.setUserFriend(userBean.getUserFriend());
                    bean.setUserName(userBean.getUserName());
                    context.write(bean,NullWritable.get());
                    System.out.println(userBean.getUserFriend());

                }

            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }
    public static void main(String[] args)throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        //动态获取jar包在哪里
        job.setJarByClass(ReduceSideJoin.class);
        //2.封装参数：本次job所要调用的mapper实现类
        job.setMapperClass(ReduceSideJoinMapper.class);
        job.setReducerClass(ReducerSideJoinReduce.class);
        //3.封装参数：本次job的Mapper实现类产生的数据key,value的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(JoinBean.class);
        //4.封装参数：本次Reduce返回的key,value数据类型
        job.setOutputKeyClass(JoinBean.class);
        job.setOutputValueClass(NullWritable.class);
        Path input=new Path("/home/tqc/Desktop/mrjoin/input");
        Path outout=new Path("/home/tqc/Desktop/mrjoin/output");
        FileSystem fs = outout.getFileSystem(conf);
        if(fs.exists(outout)){
            fs.delete(outout, true);
        }
        FileInputFormat.setInputPaths(job,input);
        FileOutputFormat.setOutputPath(job,outout);
        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0:-1);
    }
}