=============hadoop-0.12.2-core 版本===========================
MyMap.java
map方法把文本文件单词输出到中间过程output中,格式:<key,value>
handoop 1
Bye 1
handoop 1
World 1
public class MyMap extends MapReduceBase implements Mapper {
Text t = new Text();
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(WritableComparable key,
Writable value, OutputCollector output,
Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer stz = new StringTokenizer(line);
while(stz.hasMoreTokens()){
word.set(stz.nextToken());
output.collect(word, one);
}
}
}
MyReduce.java
reduce方法
遍历values 就可以得到同一个key的所有value
public class MyReduce extends MapReduceBase implements Reducer {
public void reduce(WritableComparable key, Iterator values, OutputCollector output,Reporter reporter) throws IOException {
int sum = 0;
while(values.hasNext()){
sum+=Integer.parseInt(values.next().toString());
}
output.collect(key, new IntWritable(sum));
}
}
任务,主调方法
public class JobTest{
public int run(String... args) throws IOException{
JobConf conf = new JobConf(new Configuration());
conf.setJobName("wordCount");
conf.setInputPath(new Path(args[0]));
conf.setOutputPath(new Path(args[1]));
conf.setMapperClass(MyMap.class);
conf.setReducerClass(MyReduce.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args){
try {
new JobTest().run("D:\\files\\wordCount.txt","D:\\files\\wordCoutOut");
} catch (IOException e) {
e.printStackTrace();
}
}
打开D:\files\wordCoutOut\part-00000文件如下结果:
Bye 3
Hadoop 4
Hello 3
World 2
===========hadoop-0.20.2-core版本========================
MyMap.java
public class MyMap extends Mapper<Object, Text, Text, IntWritable> {
Text t = new Text();
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//output 和reporter 都集成到Context 中
StringTokenizer itr = new StringTokenizer(value.toString());
while(itr.hasMoreTokens()){
word.set(itr.nextToken());
context.write(word, one);
}
}
}
MyReduce.java
public class MyReduce extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int sum = 0;
for(IntWritable val:values){
sum+=val.get();
}
result.set(sum);
context.write(key, result);
}
}
JobTest.java
public class JobTest{
public int run(String... args) throws IOException, InterruptedException, ClassNotFoundException{
Job job = new Job(new Configuration(),"word count");
job.setJarByClass(JobTest.class);
job.setMapperClass(MyMap.class);
job.setCombinerClass(MyReduce.class);
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);//设置reduce输出Key 类型
job.setOutputValueClass(IntWritable.class);//设置输出value 类型
FileInputFormat.addInputPath(job, new Path(args[0]));//设置输入路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
return 0;
}
public static void main(String[] args) throws InterruptedException, ClassNotFoundException{
try {
new JobTest().run("D:\\files\\wordCount.txt","D:\\files\\wordCoutOut");
} catch (IOException e) {
e.printStackTrace();
}
}