

一. OutputFormat接口实现类


1.1 文本输出TextOutputFormat


1.2 SequenceFileOutputFormat


1.3 ​自定义OutputFormat


二. 自定义OutputFormat的使用场景和步骤

2.1 使用场景



2.2 自定义OutputFormat步骤



三. 举例操作

3.1 需求


3.2 需求分析

  1. 需求: 过滤输入的log日志,包含buwenbuhuo的网站输出到​​d:/buwenbuhuo.log​​​,不包含buwenbuhuo的网站输出到​​d:/other.log​​​。
3.3 编写代码

  • 1. 自定义一个MyOutputFormat类
package com.buwenbuhuo.outputformat;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
* @author 卜温不火
* @create 2020-04-25 16:37
* com.buwenbuhuo.outputformat - the name of the target package where the new class or interface will be created.
* mapreduce0422 - the name of the current project.
public class MyOutputFormat extends FileOutputFormat<LongWritable, Text> {
public RecordWriter<LongWritable, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
MyRecordWriter myRecordWriter = new MyRecordWriter();
return myRecordWriter;
  • 2. 编写MyRecordWriter类
package com.buwenbuhuo.outputformat;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
* @author 卜温不火
* @create 2020-04-25 16:37
* com.buwenbuhuo.outputformat - the name of the target package where the new class or interface will be created.
* mapreduce0422 - the name of the current project.
public class MyRecordWriter extends RecordWriter<LongWritable, Text> {

private FSDataOutputStream buwenbuhuo;
private FSDataOutputStream other;

* 初始化方法
* @param job
public void initialize(TaskAttemptContext job) throws IOException {
String outdir = job.getConfiguration().get(FileOutputFormat.OUTDIR);
FileSystem fileSystem = FileSystem.get(job.getConfiguration());
buwenbuhuo = fileSystem.create(new Path(outdir + "/buwenbuhuo.log"));
other = fileSystem.create(new Path(outdir + "/other.log"));

* 将KV写出,每对KV调用一次
* @param key
* @param value
* @throws IOException
* @throws InterruptedException
public void write(LongWritable key, Text value) throws IOException, InterruptedException {
String out = value.toString() + "\n";
if (out.contains("buwenbuhuo")) {
} else {

* 关闭资源
* @param context
* @throws IOException
* @throws InterruptedException
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
  • 3. 编写OutputDriver类
package com.buwenbuhuo.outputformat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
* @author 卜温不火
* @create 2020-04-25 16:37
* com.buwenbuhuo.outputformat - the name of the target package where the new class or interface will be created.
* mapreduce0422 - the name of the current project.
public class OutputDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Job job = Job.getInstance(new Configuration());


FileInputFormat.setInputPaths(job, new Path("d:\\input"));
FileOutputFormat.setOutputPath(job, new Path("d:\\output"));

boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);

3.4 运行及结果

  • 1. 运行
  • 2.结果
