idea mapreduce编程统计各科目的平均成绩 mapreduce计算学生平均成绩

转载

imking 2024-07-12 13:26:34

文章标签 MapReduce 学生成绩增强版分组排序 Text 文章分类 架构后端开发

题目及数据：

computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

一、数据解释

数据字段个数不固定：
第一个是课程名称，总共四个课程，computer，math，english，algorithm，
第二个是学生姓名，后面是每次考试的分数

二、统计需求：
1、统计每门课程的参考人数和课程平均分

2、统计每门课程参考学生的平均分，并且按课程存入不同的结果文件，要求一门课程一个结果文件，并且按平均分从高到低排序，分数保留一位小数

3、求出每门课程参考学生平均分最高的学生的信息：课程，姓名和平均分

题目解析：1、课程平均分需要在map中先计算每个人的课程平均成绩，然后在reduce中求出整体的平均成绩

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.reduceWork;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
 *
 */
public class StudentScore3MR {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(StudentScore3MR.class);
		job.setMapperClass(StudentScore3MR_Mapper.class);
		job.setReducerClass(StudentScore3MR_Reducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
//		
//		String inputpath = args[0];
//		String outpath = args[1];
		
		Path inputPath = new Path("d:/a/homework6.txt");
		Path outputPath = new Path("d:/a/homework6");
		if (fs.exists(inputPath)) {
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		boolean isdone = job.waitForCompletion(true);
		System.exit(isdone ? 0 : 1);
	}
	//1、统计每门课程的参考人数和课程平均分
	public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			//algorithm,huangzitao,85,86,41,75,93,42,85,75
			String [] reads = value.toString().trim().split(",");
			String kk = reads[0];
			int sum = 0;
			int count = 0;
			double avg = 0;
			for(int i = 2; i < reads.length; i++){
				sum += Integer.parseInt(reads[i]);
				count++;
			}
			avg = 1.0 * sum / count;
			String vv = avg + "";
			kout.set(kk);
			valueout.set(vv);
			context.write(kout, valueout);
		}
	}
	public static class StudentScore3MR_Reducer extends Reducer<Text, Text, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
			double sum = 0;
			int count = 0;
			double avg = 0;
			for(Text text : values){
				sum += Double.parseDouble(text.toString());
				count ++;
			}
			avg = sum / count;
			String vv = count + "\t" + avg;
			valueout.set(vv);
			context.write(key, valueout);
		}
		
	}

}

结果：

algorithm	6	71.60119047619047
computer	10	69.79896825396825
english	9	66.22655122655122
math	7	72.88265306122449

2、输出结果存储到不同的结果文件中，需要指定setNumReduceTasks，分区规则通过使用partitioner进行分区设定，平均成绩需要进行排序，可以使用封装对象的方式，通过实现WritableComparable接口进行设置排序规则

实体类定义：

/**
 * @author: lpj   
 * @date: 2018年3月14日 下午9:46:02
 * @Description:
 */
package lpj.day2.homeworkbean;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.apache.hadoop.io.WritableComparable;

/**
 *
 */
public class Student implements WritableComparable<Student>{
	private String name;
	private double score;
	private String course;
	

	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public double getScore() {
		return score;
	}
	public void setScore(double score) {
		this.score = score;
	}
	public String getCourse() {
		return course;
	}
	public void setCourse(String course) {
		this.course = course;
	}
	
	@Override
	public String toString() {
		DecimalFormat fs = new DecimalFormat("#.#");
	
			return  course + "\t" +name+ "\t"+ fs.format(score);

		
	}

	public Student() {
		
	}


	public Student(String name, double score, String course) {
		super();
		this.name = name;
		this.score = score;
		this.course = course;
	}
	@Override
	public int compareTo(Student o) {
		int diff = this.course.compareTo(o.course);
		if (diff == 0) {
			
			return (int)(o.score - this.score);
		}else{
			return diff > 0 ? 1 : -1;
		}
	}
	/* (non-Javadoc)
	 * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
	 */
	@Override
	public void readFields(DataInput in) throws IOException {
		name = in.readUTF();
		score = in.readDouble();
		course = in.readUTF();
	}
	/* (non-Javadoc)
	 * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
	 */
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(name);
		out.writeDouble(score);
		out.writeUTF(course);
	}
	

}

分区器定义：

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午10:13:24
 * @Description:
 */
package lpj.reduceWorkbean;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 *
 */
public class MyPatitioner extends Partitioner<Student, NullWritable>{

	/* (non-Javadoc)
	 * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(java.lang.Object, java.lang.Object, int)
	 */
	@Override
	public int getPartition(Student key, NullWritable value, int numPartitions) {
		if (key.toString().startsWith("math")) {
			return 0;
		}else if (key.toString().startsWith("english")) {
			return 1;
		}else if (key.toString().startsWith("computer")) {
			return 2;
		}else {
			return 3;
		}
	}

}

主体程序：

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.reduceWork;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import lpj.reduceWorkbean.MyPatitioner;
import lpj.reduceWorkbean.Student;
/**
 *
 */
public class StudentScore3_2MR2 {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(StudentScore3_2MR2.class);
		job.setMapperClass(StudentScore3MR_Mapper.class);
		job.setReducerClass(StudentScore3MR_Reducer.class);
		
		job.setMapOutputKeyClass(Student.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(Student.class);
		job.setOutputValueClass(NullWritable.class);
		
		job.setPartitionerClass(MyPatitioner.class);//设置分区器
		job.setNumReduceTasks(4);//设置任务数目
//		
//		String inputpath = args[0];
//		String outpath = args[1];
		
		Path inputPath = new Path("d:/a/homework6.txt");
		Path outputPath = new Path("d:/a/homework6_2");
		if (fs.exists(inputPath)) {
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		boolean isdone = job.waitForCompletion(true);
		System.exit(isdone ? 0 : 1);
	}
	//2统计每门课程参考学生的平均分，并且按课程存入不同的结果文件，要求一门课程一个结果文件，并且按平均分从高到低排序，分数保留一位小数
	public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Student, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		Student stu = new Student();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			//algorithm,huangzitao,85,86,41,75,93,42,85,75
			String [] reads = value.toString().trim().split(",");
			String kk = reads[0];
			int sum = 0;
			int count = 0;
			double avg = 0;
			for(int i = 2; i < reads.length; i++){
				sum += Integer.parseInt(reads[i]);
				count++;
			}
			avg = 1.0 * sum / count;
		
			stu.setCourse(kk);
			stu.setName(reads[1]);
			stu.setScore(avg);


			context.write(stu, NullWritable.get());
		}
	}
	public static class StudentScore3MR_Reducer extends Reducer< Student, NullWritable,  Student, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Student key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
	
			context.write(key, NullWritable.get());
		}
		
	}

}

3、题目涉及排序以及分组，分组使用WritableComparator，进行分组字段设置。其中需要注意的是分组字段与排序字段的关系：分组字段一定是排序字段中的前几个

举例：排序规则：a,b,c,d,e。那么分组规则就只能是以下情况中的任意一种：

a / a,b / a,b,c / a,b,c,d / a,b,c,d,e 不能跳跃

排序字段一定大于等于分组字段，并且包含分组字段

使用分组组件进行：

实体类如题2

分组类代码：

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午10:36:55
 * @Description:
 */
package lpj.reduceWorkbean;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 *
 */
public class MyGroup extends WritableComparator{
	

	public MyGroup() {
		super(Student.class,true);//创建对象
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		
		Student s1 = (Student)a;
		Student s2 = (Student)b;
		
		return s1.getCourse().compareTo(s2.getCourse());//设置课程分组器
	}
	
	

}

主体类代码;

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.reduceWork;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import lpj.reduceWorkbean.MyGroup;
import lpj.reduceWorkbean.MyPatitioner;
import lpj.reduceWorkbean.Student;
/**
 *
 */
public class StudentScore3_3MR3 {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(StudentScore3_3MR3.class);
		job.setMapperClass(StudentScore3MR_Mapper.class);
		job.setReducerClass(StudentScore3MR_Reducer.class);
		
		job.setMapOutputKeyClass(Student.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(Student.class);
		job.setOutputValueClass(NullWritable.class);
		job.setGroupingComparatorClass(MyGroup.class);//调用分组
		
		Path inputPath = new Path("d:/a/homework6.txt");
		Path outputPath = new Path("d:/a/homework6_3");
		if (fs.exists(inputPath)) {
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		boolean isdone = job.waitForCompletion(true);
		System.exit(isdone ? 0 : 1);
	}
	//3求出每门课程参考学生平均分最高的学生的信息：课程，姓名和平均分
	public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Student, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		Student stu = new Student();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			//algorithm,huangzitao,85,86,41,75,93,42,85,75
			String [] reads = value.toString().trim().split(",");
			String kk = reads[0];
			int sum = 0;
			int count = 0;
			double avg = 0;
			for(int i = 2; i < reads.length; i++){
				sum += Integer.parseInt(reads[i]);
				count++;
			}
			avg = 1.0 * sum / count;
		
			stu.setCourse(kk);
			stu.setName(reads[1]);
			stu.setScore(avg);
			context.write(stu, NullWritable.get());
		}
	}
	public static class StudentScore3MR_Reducer extends Reducer< Student, NullWritable,  Student, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Student key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {				
				context.write(key, NullWritable.get());

		}
		
	}

}

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。