MapReduce--->Map端的join

简绍
  • 这个使用于小表关联大表,将小表在map阶段缓存,从而完成表的join操作
  • MapJoin没有Reduce阶段
代码

Map

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;

public class map extends Mapper<LongWritable, Text,Text, NullWritable> {
    Map<String,String>map = new HashMap<>();
    Text k = new Text();
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        URI[] cacheFiles = context.getCacheFiles();
        String path =cacheFiles[0].getPath().toString();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
        String line;
        while ((line=bufferedReader.readLine())!=null){
            String [] words = line.split(" ");
            map.put(words[0],words[1]);
        }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String [] s = value.toString().split(" ");
        String brand = map.get(s[1]);
        k.set(s[0]+"\t"+brand+"\t"+s[2]);
        context.write(k,NullWritable.get());
    }
}

Driver

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.File;
import java.net.URI;

public class dri {
    public static void main(String[] args) throws Exception {
        File file = new File("D:\\MapJoin\\output");
        if (file.exists()){
            delFile(file);
            driver();
        }else {
            driver();
        }
    }
    public static void delFile(File file) {
        File[] files = file.listFiles();
        if (files != null && files.length != 0) {
            for (int i = 0;i<files.length;i++) {
                delFile(files[i]);
            }
        }
        file.delete();
    }
    public  static void driver() throws Exception{
        Job job = Job.getInstance(new Configuration());

        job.setMapperClass(map.class);
        job.setJarByClass(dri.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.addCacheFile(new URI("file:///D:/MapJoin/input/pd.txt"));

        FileInputFormat.setInputPaths(job, "D:\\MapJoin\\input\\order.txt");
        FileOutputFormat.setOutputPath(job, new Path("D:\\MapJoin\\output"));

        boolean b = job.waitForCompletion(true);
        System.exit(b ? 0 : 1);
    }
}