MapReduce实践攻略
超详细入门级-WordCount
问题描述:
统计一个文件中,各种单词出现的次数
思路分析:
- 在map阶段,对每行数据调用一次map方法,对读取到的每行数据按空格进行切割,将分割得到的每个单词作为key,value的值给定为1传递给reduce
- 在reduce阶段,从map接收到传递过来的key和value,key值相同的为同一组,对每一组只调用一次reduce方法,将每一组的value值累加即可得到该单词出现的次数,最后将该组的key作为key,累加的value作为value作为结果输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| public class WordCountMR2 extends Configured implements Tool { /** * KEYIN: 默认情况下,是mr框架所读到的一行文本的起始偏移量,Long, * 但是在hadoop中有自己的更精简的序列化接口,所以不直接用Long,而用LongWritable * VALUEIN:默认情况下,是mr框架所读到的一行文本的内容,String,同上,用Text * KEYOUT:是用户自定义逻辑处理完成之后输出数据中的key,在此处是单词,String,同上,用Text * VALUEOUT:是用户自定义逻辑处理完成之后输出数据中的value,在此处是单词次数,Integer,同上,用IntWritable */ public static class WCMapper extends Mapper<LongWritable,Text, Text, IntWritable> { /** * map阶段的业务逻辑就写在自定义的map()方法中 * maptask会对每一行输入数据调用一次我们自定义的map()方法 * context是上下文引用对象,传递输出值 */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Collections.list(new StringTokenizer(value.toString()," ")).stream().map(s -> ((String)s).trim()) .filter(s -> s.length() > 1).forEach(ExceptionConsumer.of(word -> context.write(new Text(word),new IntWritable(1)))); } } /** * KEYIN, VALUEIN对应mapper输出的KEYOUT,VALUEOUT类型对应 * KEYOUT, VALUEOUT是自定义reduce逻辑处理结果的输出数据类型 * KEYOUT是单词 * VLAUEOUT是总次数 */ public static class WCReducer extends Reducer<Text,IntWritable,Text,IntWritable> { /** * reduce阶段的业务逻辑就写在自定义的reduce()方法中 * reducetask会对所有相同的key调用一次reduce()方法 * context是上下文引用对象,传递输出值 */ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //map阶段的输出是reduce阶段的输入,样式如下 //<helle,1><hello,1><helle,1><hello,1><helle,1><hello,1> //<tom,1><tom,1><tom,1> //<good,1> // int count = 0; // for (IntWritable value : values){ // count += value.get(); // } // context.write(key, new IntWritable(count)); IntWritable count = StreamSupport.stream(values.spliterator(), false).collect(Collectors.toSet()).stream() .reduce((a, b) -> new IntWritable(a.get() + b.get())).get(); context.write(key,count); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); //创建job实例对象 Job job = Job.getInstance(conf,"test_fun_wordcount2"); //指定本程序的jar包所在的本地路径 job.setJarByClass(this.getClass()); //指定本业务job要使用的mapper/Reducer业务类 job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); //指定mapper输出数据的kv类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //指定最终输出的数据的kv类型 //注:不是setReduceOutput,因为有的时候只需要用到map,直接输出map的结果就可以 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //指定job的输入原始文件所在目录 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); //指定job的输出结果所在目录 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); //指定开启的reduce的数量 job.setNumReduceTasks(1); //将job中配置的相关参数,以及job所用的java类所在的jar包,提交给yarn去运行 return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception{ ToolRunner.run(new WordCountMR2(),args); } }
|
去重-DuplicateRemoveMR
问题描述:
去掉列表中所有重复的值,不考虑顺序
思路分析:
将每一行的值按分隔符切开重新排序,然后再拼接起来作为key,value置为NullWritable类型,传递给reduce,reduce对相同的key只会输出一次,以此达到去重复的效果。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| public class DuplicateRemoveMR extends Configured implements Tool { public static class DRMapper extends Mapper<LongWritable,Text, Text, NullWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String str = Collections.list(new StringTokenizer(value.toString(), ",")).stream() .map(s -> ((String) s).trim()).filter(s -> s.length() > 1).sorted() .collect(Collectors.joining(",")); context.write(new Text(str), NullWritable.get()); } } public static class DRReducer extends Reducer<Text,NullWritable,Text,NullWritable>{ @Override protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key,NullWritable.get()); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf,"dup_remove_xj"); job.setJarByClass(DuplicateRemoveMR.class); job.setMapperClass(DRMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(DRReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); job.setNumReduceTasks(1); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception{ ToolRunner.run(new DuplicateRemoveMR(),args); } }
|
倒置索引-InvertIndexMR
问题描述:
统计不同文件中单词出现的次数,还要输出该单词存在于哪些文件中
思路分析:
输入的每一行按分隔符切割成一个个单词,作为key,当前文件路径作为value传递给reduce,在reduce阶段统计相同key的个数即为单词个数,然后映射输出形式和拼接value的值,最后将单词作为key,单词个数和拼接起来的文件路径作为value输出。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| public class InvertIndexMR extends Configured implements Tool { public static class IIMapper extends Mapper<LongWritable,Text, Text, Text> { Text file = new Text(); @Override protected void map(LongWritable key, Text value, Context context){ // ExceptionConsumer为自定义捕获异常类型,可用trycatch代替 Collections.list(new StringTokenizer(value.toString()," ")).stream().map(s -> ((String)s).trim()) .filter(s -> s.length() > 1).forEach(ExceptionConsumer.of(name -> context.write(new Text(name),file))); } // setup在map前就运行了 @Override protected void setup(Context context){ String name = ((FileSplit) context.getInputSplit()).getPath().getName(); file.set(name); } } public static class IIReducer extends Reducer<Text,Text,Text,Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // StreamSupport.stream(values.spliterator(), false)是将Iterable类型转换为stream String str = StreamSupport.stream(values.spliterator(), false) .collect(Collectors.groupingBy(Text::toString, Collectors.counting())).entrySet().stream() .map(en -> en.getKey() + ":" + en.getValue()).collect(Collectors.joining(" ")); context.write(key,new Text(str)); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf, "invert_index_xj"); job.setJarByClass(InvertIndexMR.class); job.setMapperClass(IIMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(IIReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); job.setNumReduceTasks(1); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception{ ToolRunner.run(new InvertIndexMR(),args); } }
|
共现矩阵-ConcurrenceMR
问题描述:
求出两两共同好友出现的次数。例如,甲好友列表有1和2,乙好友列表也有1和2,那么1和2共现的次数为2,共现次数越大,说明两者关联的可能性越大。
思路分析:
第一步,先输出每个人的所有好友。第二步,map阶段循环每个人的好友两两组合的结果并排序,将所有的两两组合分别作为key,value置为1输出,reduce阶段直接统计相同key的个数即为两两共同好友数。
第一步:FlatFriendsMR
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| public class FlatFriendsMR extends Configured implements Tool{ static class FFMapper extends Mapper<LongWritable,Text, Text, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Stream.of(value.toString()).filter(s->s.length()>1).map(line->line.split(",")) .filter(arr->arr.length==2).forEach(ExceptionConsumer.of(arr->context .write(new Text(arr[0].trim()),new Text(arr[1].trim())))); } } static class FFReducer extends Reducer<Text,Text,Text,Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String fs = StreamSupport.stream(values.spliterator(), false).map(s -> s.toString()) .collect(Collectors.joining(",")); context.write(key,new Text(fs)); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf,"flat_friends_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(FFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(FFReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); job.setNumReduceTasks(1); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception{ ToolRunner.run(new FlatFriendsMR(),args); } }
|
第二步:ConcurrenceMR
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
| public class ConcurrenceMR extends Configured implements Tool{ static class CCMapper extends Mapper<LongWritable,Text, Text, IntWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String s = value.toString(); String[] arr = s.split("\t"); String[] names = arr[1].split(","); // 将所有好友两两组合输出 for (int i = 0; i < names.length-1; i++){ for (int j = i+1; j < names.length; j++){ String first = names[i]; String second = names[j]; String pair = getPair(first,second); context.write(new Text(pair),new IntWritable(1)); } } } /** * 排序,防止key重复 * @param first * @param second * @return */ public String getPair(String first,String second){ if(first.compareTo(second) > 0){ return second+","+first; }else{ return first+","+second; } } } static class CCReducer extends Reducer<Text,Text,Text,IntWritable> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // 将好友组合两两相同的累加 long count = StreamSupport.stream(values.spliterator(), false).count(); context.write(key,new IntWritable((int)count)); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf,"concurrence_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(CCMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(CCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); job.setNumReduceTasks(1); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception{ ToolRunner.run(new ConcurrenceMR(),args); } }
|
MapReduce排序
局部排序-PartitionSortMR
问题描述:
将所有数据根据气温排序,每个分区之间不存在排序关系,仅在各个区内部进行排序
思路分析:
默认排序方式,只需要将key设置为温度即可
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
| public class PartitionSortMR extends Configured implements Tool { public static class PSMapper extends Mapper<LongWritable, Text, DoubleWritable, Text>{ // 将气温作为key,整体作为value @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] ss = line.split("\t"); String tmp = ss[2]; context.write(new DoubleWritable(Double.parseDouble(tmp)),value); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf,"part_sort_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(PSMapper.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); //-D mapreduce.job.reduces job.setNumReduceTasks(5); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception { ToolRunner.run(new PartitionSortMR(),args); } }
|
全局排序-TotalSortMR
问题描述:
将所有数据根据气温排序,每个分区之间也存在排序关系
思路分析:
设置成根据样本分区排序,这样的话必须保证样本的泛型前后一致,故无法使用默认的输入格式,可以修改InputFormat或者使用sequencefile,因为sequencefile可以保存数据类型,案例中使用这种方法,先将数据转化为sequencefile,然后直接从sequencefile读取数据进行分区排序。
OutSequenceMR
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| public class OutSequenceMR extends Configured implements Tool { public static class OSMapper extends Mapper<LongWritable, Text, DoubleWritable, Text>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] ss = line.split("\t"); String tmp = ss[2]; context.write(new DoubleWritable(Double.parseDouble(tmp)),value); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf, "out_sequence_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(OSMapper.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); SequenceFileOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); //-D mapreduce.job.reduces //job.setNumReduceTasks(1); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception { ToolRunner.run(new OutSequenceMR(),args); } }
|
TotalSortMR
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| public class TotalSortMR extends Configured implements Tool { public static class TSMapper extends Mapper<DoubleWritable, Text, DoubleWritable, Text>{ @Override protected void map(DoubleWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf, "total_sort_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(TSMapper.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); SequenceFileInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); // 设置成根据样本分区排序 job.setPartitionerClass(TotalOrderPartitioner.class); // 获取随机样本 // 0.8表示,数量少的话,随机取80%的数据作为样本 // 1000表示,数量很多的话,随机取1000个数据作为样本 // 10表示,最大支持10个分区 InputSampler.RandomSampler<DoubleWritable,Text> sam = new InputSampler.RandomSampler(0.8,1000,10); //把采样结果传递给job InputSampler.writePartitionFile(job,sam); String file = TotalOrderPartitioner.getPartitionFile(job.getConfiguration()); job.addCacheFile(URI.create(file)); // job.setNumReduceTasks(5); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception { ToolRunner.run(new TotalSortMR(),args); } }
|
二次排序-SecondarySortMR
问题描述:
将所有数据先根据年份升序排列,再根据气温降序排列
思路分析:
要进行二次排序,必须要创建一个复合类型作为key来进行排序比较,这个复合类型实现WritableComparable接口,包含年份和气温两个属性,重写compareTo()方法,按年份升序,按气温降序。除此之外,要实现二次排序必须保证相同年份的被分到同一个分区,这样才可以比较气温。因此,还需要定义一个类来继承Partitioner抽象类,重写getPartition()方法,使分区根据年份来划分。另外,还需手动设置根据年份进行分组,故还需要创建一个类实现WritableComparator接口,重写compare()方法,将相同年份的分为同一组。最后,在主类中将复合类型作为map的key的输出类型,完成排序,在job上设置自定义的分区规则和分组规则。
YearTmp(复合类型)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
| public class YearTmp implements WritableComparable<YearTmp> { private IntWritable year = new IntWritable(); // 年份 private DoubleWritable tmp = new DoubleWritable(); // 平均温度 public YearTmp() { } public YearTmp(IntWritable year, DoubleWritable tmp) { this.year = new IntWritable(year.get()); this.tmp = new DoubleWritable(tmp.get()); } public YearTmp(int year, double tmp) { this.year = new IntWritable(year); this.tmp = new DoubleWritable(tmp); } public IntWritable getYear() { return year; } public void setYear(IntWritable year) { this.year = new IntWritable(year.get()); } public DoubleWritable getTmp() { return tmp; } public void setTmp(DoubleWritable tmp) { this.tmp = new DoubleWritable(tmp.get()); } // 第二步,排序,年份升序,温度降序 @Override public int compareTo(YearTmp o) { return this.year.compareTo(o.year)==0 ? o.tmp.compareTo(this.tmp): this.year.compareTo(o.year); } @Override public void write(DataOutput dataOutput) throws IOException { year.write(dataOutput); tmp.write(dataOutput); } @Override public void readFields(DataInput dataInput) throws IOException { year.readFields(dataInput); tmp.readFields(dataInput); } }
|
YearPartitioner(自定义分区规则)
1 2 3 4 5 6 7 8
| public class YearPartitioner extends Partitioner<YearTmp, Text> { public YearPartitioner() { } @Override public int getPartition(YearTmp o,Text o2, int i) { return o.getYear().get()%i; } }
|
YearGroupComparator(自定义分组规则)
1 2 3 4 5 6 7 8 9 10 11 12
| public class YearGroupComparator extends WritableComparator { public YearGroupComparator() { super(YearTmp.class,true); } @Override public int compare(WritableComparable a, WritableComparable b) { YearTmp y1 = (YearTmp)a; YearTmp y2 = (YearTmp)b; return y1.getYear().compareTo(y2.getYear()); } }
|
SecondarySortMR(MR主程序)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| public class SecondarySortMR extends Configured implements Tool { public static class SSMapper extends Mapper<LongWritable, Text, YearTmp,Text>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] infos = line.split("\t"); YearTmp yt = new YearTmp(Integer.parseInt(infos[0]), Double.parseDouble(infos[2])); context.write(yt,new Text(infos[1])); } } public static class SSReducer extends Reducer<YearTmp,Text,Text,Text>{ @Override protected void reduce(YearTmp key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { String str = key.getYear() + "\t" + key.getTmp(); context.write(new Text(str),value); } } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf, "secondary_sort_xj"); job.setJarByClass(this.getClass()); job.setMapperClass(SSMapper.class); job.setMapOutputKeyClass(YearTmp.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(SSReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job,new Path(conf.get("inpath"))); TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath"))); // 设置分区规则 job.setPartitionerClass(YearPartitioner.class); // 设置分组规则 job.setGroupingComparatorClass(YearGroupComparator.class); return job.waitForCompletion(true)? 0 : 1; } public static void main(String[] args) throws Exception { ToolRunner.run(new SecondarySortMR(),args); } }
|