import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;;
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING=9999;
public void map(LongWritable key, Text value, Context context )
throws IOException, InterruptedException {
//将输入的Text值转换为java的String类型
String line=value.toString();
//用substring()方法提取我们感兴趣的列
String year=line.substring(15, 19);
int airTemperature;
if(line.charAt(87)=='+'){
airTemperature=Integer.parseInt(line.substring(88, 92));
}else{
airTemperature=Integer.parseInt(line.substring(87, 92));
}
String quality=line.substring(92, 93);
if(airTemperature!=MISSING&&quality.matches("[01459]")){
//输出写入内容
context.write(new Text(year), new IntWritable(airTemperature));
}
}
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//同样reduce团也有4个参数指输入和输出类型,reduce的输入类型必须是map的输出类型
public class MaxTemperatureReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
//Iterable<> 相当于List
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int maxValue=Integer.MIN_VALUE;
for(IntWritable value:values){
maxValue=Math.max(maxValue, value.get());
}
//输出类型必须是hadoop自带的类型
context.write(key, new IntWritable(maxValue));
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
* Hello world!
*
*/
public class App
{
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
{
if(args.length!=2){
System.out.println("Usage:MaxTempleature <input path> <ouput path>" );
System.exit(-1);
}
//job对象指定作业额规范,可以控制整个作业的运行
Job job=new Job();
//在hadoop集群上作业时,要把代码打成一个Jar、文件,不必明确指定jar文件的名字,在job对象的setjarByClass()方法中传递一个类即可
job.setJarByClass(App.class);
//给作业起一个名字
job.setJobName("Max templature");
//输入数据路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//输出数据的路径,只能有一个输出路径
FileInputFormat.addInputPath(job, new Path(args[1]));
//只能mapper类型和reduce类型
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReduce.class);
//reduce函数的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}