1 OutputFormat接口实现类

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_mapreduce

2 自定义OutputFormat

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_big data_02

3 自定义OutputFormat案例实操

1.需求

过滤输入的log日志,包含atguigu的网站输出到e:/atguigu.log,不包含atguigu的网站输出到e:/other.log。

(1)输入数据

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_hadoop_03

(2)期望输出数据

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_hadoop_04

 

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_自定义_05

2.需求分析

 

【硬刚Hadoop】HADOOP MAPREDUCE(10):OutputFormat数据输出_big data_06

3.案例实操

(1)编写FilterMapper类

 View Code

(2)编写FilterReducer类

package com.atguigu.mapreduce.outputformat;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FilterReducer extends Reducer<Text, NullWritable, Text, NullWritable> {

Text k = new Text();

@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {

// 1 获取一行
String line = key.toString();

// 2 拼接
line = line + "\r\n";

// 3 设置key
k.set(line);

// 4 输出
context.write(k, NullWritable.get());
}
}

(3)自定义一个OutputFormat类

package com.atguigu.mapreduce.outputformat;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable>{

@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

// 创建一个RecordWriter
return new FilterRecordWriter(job);
}
}

(4)编写RecordWriter类

 View Code

(5)编写FilterDriver类

package com.atguigu.mapreduce.outputformat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FilterDriver {

public static void main(String[] args) throws Exception {

// 输入输出路径需要根据自己电脑上实际的输入输出路径设置
args = new String[] { "e:/input/inputoutputformat", "e:/output2" };

Configuration conf = new Configuration();
Job job = Job.getInstance(conf);

job.setJarByClass(FilterDriver.class);
job.setMapperClass(FilterMapper.class);
job.setReducerClass(FilterReducer.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);

// 要将自定义的输出格式组件设置到job中
job.setOutputFormatClass(FilterOutputFormat.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));

// 虽然我们自定义了outputformat,但是因为我们的outputformat继承自fileoutputformat
// 而fileoutputformat要输出一个_SUCCESS文件,所以,在这还得指定一个输出目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));

boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}