MapReduce的分区-Partitioner

MapReduce的分区-Partitioner

分区操作时shuffle操作中的一个重要过程,作用就是讲map的结果按照规则分发到不同的reduce中进行处理,从而按照分区得到多个输出结果。
Partutuiber是partitioner的基类,如果需要盯着partitioner也需要继承该类,HashPartitioner是mapredce的默认partitioner。计算方法是
which reducer=(key.hashCode() & Integer.MAX_VALUE) % numReduceTasks
注:默认情况下reduceTask数量为1,很多时候MR自带的分区规则并不能满足我们的需求,为了实现特定的效果,可以需要自己来定义分区规则。
案例:根据城市区分,来统计每一个城市中每个人产生的流量

public class FlowMapper extends Mapper<LongWritable, Text, Text, Flow> {
	public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

		String line = value.toString();
		String[] arr = line.split(" ");
		Flow f = new Flow();
		f.setPhone(arr[0]);
		f.setCity(arr[1]);
		f.setName(arr[2]);
		f.setFlow(Integer.parseInt(arr[3]));
		context.write(new Text(f.getPhone()), f);
	}
}

//指定分区
public class FlowPartitioner extends Partitioner<Text, Flow> {

	@Override
	public int getPartition(Text key, Flow value, int numPartitions) {	
		String city = value.getCity();	
		if(city.equals("bj")){
			return 0;
		} else if(city.equals("sh"))
			return 1;
		else 
			return 2;		
	}
}

public class FlowReducer extends Reducer<Text, Flow, Text, IntWritable> {

	public void reduce(Text key, Iterable<Flow> values, Context context) throws IOException, InterruptedException {
		
		int sum = 0;
		
		for (Flow val : values) {
			sum += val.getFlow();
		}
		context.write(key, new IntWritable(sum));
	}

}
public class FlowDriver {

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf, "JobName");
		job.setJarByClass(cn.tedu.flow2.FlowDriver.class);
		job.setMapperClass(FlowMapper.class);
		job.setReducerClass(FlowReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Flow.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// 指定分区
		job.setPartitionerClass(FlowPartitioner.class);
		// 指定分区所对应的reducer数量
		job.setNumReduceTasks(3);

		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.60.132:9000/mr/flow.txt"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.60.132:9000/fpresult"));

		if (!job.waitForCompletion(true))
			return;
	}

}