大数据之HADOOP计算学生平均成绩
本站数据集:https://download.****.net/download/FRESHET/12649664
1.MAPPER类
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class AvgMapper extends Mapper<Object, Text, Text, IntWritable>{
@Override
protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
// super.map(key, value, context);
String line=value.toString();
StringTokenizer stringTokenizer=new StringTokenizer(line);
while(stringTokenizer.hasMoreTokens()) {
String name=stringTokenizer.nextToken();
int score=Integer.valueOf(stringTokenizer.nextToken());
context.write(new Text(name), new IntWritable(score));
}
}
}
2.REDUCE类
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class AvgReduce extends Reducer<Text, IntWritable, Text, FloatWritable>{
@Override
protected void reduce(Text arg0, Iterable<IntWritable> arg1,
Reducer<Text, IntWritable, Text, FloatWritable>.Context arg2) throws IOException, InterruptedException {
// TODO Auto-generated method stub
// super.reduce(arg0, arg1, arg2);
int cnt=0;
int sum=0;
float avg=0;
for(IntWritable score:arg1) {
sum+=score.get();
cnt++;
}
avg=sum/cnt;
arg2.write(arg0, new FloatWritable(avg));
}
}
3.主类稍微麻烦点
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.lu.map.AvgMapper;
import com.lu.red.AvgReduce;
public class Avg {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,"avg count");
job.setJarByClass(Avg.class);
job.setMapperClass(AvgMapper.class); // 为job设置Mapper类
job.setReducerClass(AvgReduce.class); // 为job设置Reducer类
//这回必须加了,Mapper<Object, Text, Text, IntWritable>,Reducer<Text, IntWritable, Text, FloatWritable>
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class); //这里用了FLOAT
Path in1= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/score1.txt");
Path in2= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/score2.txt");
Path in3= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/score3.txt");
Path in4= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/score4.txt");
Path in5= new Path("hdfs://192.168.1.27:9000/test/wordcnt/in/score5.txt");
Path out= new Path("hdfs://192.168.1.27:9000/test/wordcnt/out/4");
FileInputFormat.addInputPath(job, in1);
FileInputFormat.addInputPath(job, in2);
FileInputFormat.addInputPath(job, in3);
FileInputFormat.addInputPath(job, in4);
FileInputFormat.addInputPath(job, in5);
FileOutputFormat.setOutputPath(job, out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果汇总后的百分比