package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;;
//Mapper类是一个泛型类参数分别指(输入键【长整数偏移量】,输入值【一行文本】,输出键,输出值) 是一套可优化网络序列化传输的基本类型,这类类型都在org.apache.hadoop.io包中
public class MaxTemperatureMapper extends  Mapper<LongWritable, Text, Text, IntWritable> {
    private static final int MISSING=9999;
 public void map(LongWritable key, Text value, Context context )
   throws IOException, InterruptedException {
  //将输入的Text值转换为java的String类型
  String line=value.toString();
  //用substring()方法提取我们感兴趣的列
  String year=line.substring(15, 19);
  int airTemperature;
  if(line.charAt(87)=='+'){
    airTemperature=Integer.parseInt(line.substring(88, 92));   
  }else{
     airTemperature=Integer.parseInt(line.substring(87, 92)); 
  }
  String quality=line.substring(92, 93);
  if(airTemperature!=MISSING&&quality.matches("[01459]")){
   //输出写入内容
   context.write(new Text(year), new IntWritable(airTemperature));
  
  }
 }
 

}
package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//同样reduce团也有4个参数指输入和输出类型,reduce的输入类型必须是map的输出类型
public class MaxTemperatureReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
   //Iterable<> 相当于List
 @Override
 protected void reduce(Text key, Iterable<IntWritable> values,
   Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
    int maxValue=Integer.MIN_VALUE;
    for(IntWritable value:values){
       maxValue=Math.max(maxValue, value.get());
    }
    //输出类型必须是hadoop自带的类型
    context.write(key, new IntWritable(maxValue));
 }
   
}

package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

/**
 * Hello world!
 *
 */
public class App
{
    public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
    {
     if(args.length!=2){
      System.out.println("Usage:MaxTempleature <input path> <ouput path>" );
      System.exit(-1);
     }
     //job对象指定作业额规范,可以控制整个作业的运行
     Job job=new Job();
     //在hadoop集群上作业时,要把代码打成一个Jar、文件,不必明确指定jar文件的名字,在job对象的setjarByClass()方法中传递一个类即可
       job.setJarByClass(App.class);
       //给作业起一个名字
       job.setJobName("Max templature");
       //输入数据路径
       FileInputFormat.addInputPath(job, new Path(args[0]));
       //输出数据的路径,只能有一个输出路径
       FileInputFormat.addInputPath(job, new Path(args[1]));
       //只能mapper类型和reduce类型
       job.setMapperClass(MaxTemperatureMapper.class);
       job.setReducerClass(MaxTemperatureReduce.class);
       //reduce函数的输出类型
       job.setOutputKeyClass(Text.class);
      
       job.setOutputValueClass(IntWritable.class);
       System.exit(job.waitForCompletion(true)?0:1);
    }
}