reduce端join操作

原创

ccna_zhang 2022-09-05 17:07:24 博主文章分类：hadoop ©著作权

文章标签 hadoop apache mapreduce 文章分类 后端开发

©著作权归作者所有：来自51CTO博客作者ccna_zhang的原创作品，请联系作者获取转载授权，否则将追究法律责任

reduce端join操作_mapreduce

实例：输入是两个文件，一个代表工厂表，包含工厂名列和地址编号列；另一个代表地址表，包含地址名列和地址编号列，要求从输入数据中找出工厂名和

地址名的对应关系，输出工厂名-地址名表

数据样本

reduce端join操作_hadoop_02

图：factory

reduce端join操作_mapreduce_03

图：address

reduce端join操作_mapreduce_04

图：输出结果

package com.ccse.hadoop.join;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * reduce端join
 * @author woshiccna
 *
 */
public class MultiTableJoinApp {

  public static final String INPUT_PATH = "hdfs://chaoren1:9000/reducejoininput";
  public static final String OUTPUT_PATH = "hdfs://chaoren1:9000/reducejoinoutput";
  public static int time = 0;
  
  public static void main(String[] args) throws IOException, URISyntaxException,
      ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
    fileSystem.delete(new Path(OUTPUT_PATH), true);
    
    Job job = new Job(conf, MultiTableJoinApp.class.getSimpleName());
    job.setJarByClass(MultiTableJoinApp.class);
    
    FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
    
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
    job.waitForCompletion(true);
  }
  
  /**
   * 在map中先区分输入行属于左表还是右表，然后对两列值进行分割，
   * 连接列保存在key值，剩余列和左右表标志保存在value中，最后输出
   * @author woshiccna
   *
   */
  public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
    @Override
    protected void map(LongWritable key, Text value,
        Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      if (value != null) {
        String line = value.toString();
        int i = 0; 
        if (line.contains("factoryname") || line.contains("addressID")) {
          return;
        }
        //找出数据中的分割点
        while (line.charAt(i) >= '9' || line.charAt(i) <= '0') {
          i++;
        }
        if (line.charAt(0) >= '9' || line.charAt(0) <= '0') {
          //左表
          int j = i - 1;
          while (line.charAt(j) != ' ') {
            j--;
          }
          context.write(new Text(line.substring(i)), new Text("1+" + line.substring(0, j)));
        } else {
          //右表
          int j = i + 1;
          while (line.charAt(j) != ' ') {
            j++;
          }
          context.write(new Text(line.substring(0, i + 1)), new Text("2+" + line.substring(j)));
        }
      }
    }
  }
  
  public static class MyReducer extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values,
        Reducer<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      if (time == 0) {
        context.write(new Text("factoryname"), new Text("addressname"));
        time++;
      }
      int factoryNum = 0;  //工厂数量
      int addressNum = 0;  //地址数量
      String[] factory = new String[10];
      String[] address = new String[10];
      if (values != null) {
        for (Text value : values) {
          if (value != null) {
            String line = value.toString();
            if (line.startsWith("1+")) {   //factory
              factory[factoryNum++] = line.substring(2);
            } else if (line.startsWith("2+")) {  //address
              address[addressNum++] = line.substring(2);
            }
          }
        }
        for (int i = 0; i < factoryNum; i++) {
          for (int j = 0; j < addressNum; j++) {
            context.write(new Text(factory[i]), new Text(address[j]));
          }
        }
      }
    }
  }
}