题目描述

关于对于学生成绩相关的练习题,之前是一个入门级别的需求,现在对这些需求进行增强,首先看数据的改变:

computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

一、数据解释

数据字段个数不固定:
第一个是课程名称,总共四个课程,computer,math,english,algorithm,
第二个是学生姓名,后面是每次考试的分数

 

二、统计需求:

1、统计每门课程的参考人数和课程平均分

2、统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数

3、求出每门课程参考学生成绩最高的学生的信息:课程,姓名和平均分

 

三、解题思路

mapper阶段的输出:

key: CourseScore

value: NullWritable

reducer阶段的输出:

key:CourseScore

value:NullWritable

 

四、代码实现

package com.ghgj.mazh.mapreduce.exercise.coursescore3;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class CourseScoreMR_Pro_02 {

public static void main(String[] args) throws Exception {
/**
* 一些参数的初始化
*/
String inputPath = "D:\\bigdata\\coursescore2\\input";
String outputPath = "D:\\bigdata\\coursescore2\\output2";

/**
* 初始化一个Job对象
*/
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);

/**
* 设置jar包所在路径
*/
job.setJarByClass(CourseScoreMR_Pro_02.class);

/**
* 指定mapper类和reducer类 等各种其他业务逻辑组件
*/
job.setMapperClass(Mapper_CS.class);
job.setReducerClass(Reducer.class);
// 指定maptask的输出类型
job.setMapOutputKeyClass(CourseScore.class);
job.setMapOutputValueClass(NullWritable.class);
// 指定reducetask的输出类型
job.setOutputKeyClass(CourseScore.class);
job.setOutputValueClass(NullWritable.class);

/**
* 设置reduceTask数量和分区器
*/
job.setNumReduceTasks(4);
job.setPartitionerClass(MyPartitioner.class);

/**
* 指定该mapreduce程序数据的输入和输出路径
*/
Path input = new Path(inputPath);
Path output = new Path(outputPath);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);

/**
* 最后提交任务
*/
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion ? 0 : 1);
}

/**
* Mapper组件:
* <p>
* 输入的key:
* 输入的value:
* <p>
* 输出的key:
* 输入的value:
*/
private static class Mapper_CS extends Mapper<LongWritable, Text, CourseScore, NullWritable> {

CourseScore keyOut = new CourseScore();

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String[] splits = value.toString().split(",");
String course = splits[0];
String name = splits[1];

int sum = 0;
int num = 0;
for(int i=2; i<splits.length; i++){
sum += Integer.valueOf(splits[i]);
num ++;
}
double avgScore = Math.round(sum * 1D / num * 10) / 10D;

keyOut.setCourse(course);
keyOut.setName(name);
keyOut.setScore(avgScore);

context.write(keyOut, NullWritable.get());
}
}

/**
* 自定义分区组件
*/
public static class MyPartitioner extends Partitioner<CourseScore, NullWritable>{

@Override
public int getPartition(CourseScore courseScore, NullWritable nullWritable, int numPartitions) {

String course = courseScore.getCourse();
if(course.equals("computer")){
return 0;
}else if(course.equals("english")){
return 1;
}else if(course.equals("algorithm")){
return 2;
}else{
return 3;
}
}
}

}

其中 CourseScore类的实现:

package com.ghgj.mazh.mapreduce.exercise.coursescore3;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CourseScore implements WritableComparable<CourseScore> {

private String course;
private String name;
private double score;

public CourseScore(String course, String name, double score) {
super();
this.course = course;
this.name = name;
this.score = score;
}

public CourseScore() {
}

public String getCourse() {
return course;
}

public void setCourse(String course) {
this.course = course;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public double getScore() {
return score;
}

public void setScore(double score) {
this.score = score;
}

@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(course);
out.writeUTF(name);
out.writeDouble(score);
}

@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.course = in.readUTF();
this.name = in.readUTF();
this.score = in.readDouble();
}

/**
* 排序规则
* compareTo方法既充当排序用,用充当分组规则
*/
@Override
public int compareTo(CourseScore cs) {

int compareTo = this.course.compareTo(cs.getCourse());

if (compareTo == 0) {
double diff = cs.getScore() - this.score;
if (diff >
0) {
return 1;
} else if (diff < 0) {
return -1;
} else {
return 0;
}
} else {
return compareTo;
}
}


@Override
public String toString() {
return course + "\t" + name + "\t" + score;
}
}

 

五、执行结果

MapReduce--13--学生成绩(增强版)--需求2_MapReduce例题

MapReduce--13--学生成绩(增强版)--需求2_apache_02

MapReduce--13--学生成绩(增强版)--需求2_MapReduce例题_03

MapReduce--13--学生成绩(增强版)--需求2_MapReduce学习_04