本文共 2884 字,大约阅读时间需要 9 分钟。
这里简单的用一张图进行MapReduce WordCount的介绍,过程都很详细的。
Mapper
package wordconut;/** * @author ganxiang * IDE IntelliJ IDEA * @project_name and filename HadoopTraining wordconut.WcMapper * @date 2020/04/25 0025 10:03 */import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class WcMapper extends Mapper{ @Override protected void map(LongWritable key1, Text value1, Context context) throws IOException, InterruptedException { //初始化计数器 IntWritable count =new IntWritable(1); //读入数据 String line =value1.toString(); //空格分词 String [] words =line.split(" "); //写出数据 for (String word:words){ context.write(new Text(word),count); } }}
Reducer
package wordconut;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/** * @author ganxiang * IDE IntelliJ IDEA * @project_name and filename HadoopTraining wordconut.WCReducer * @date 2020/04/25 0025 11:53 */public class WCReducer extends Reducer{ @Override protected void reduce(Text key3, Iterable values3, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable count:values3){ sum+=count.get(); } context.write(new Text(key3),new IntWritable(sum)); }}
Job
package wordconut;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class WcJob { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf =new Configuration(); Job wcJob =Job.getInstance(conf); //1,设置job任务入口 wcJob.setJarByClass(WcJob.class); //2,设置mapper wcJob.setMapperClass(WcMapper.class); //mapper的输出 wcJob.setMapOutputKeyClass(Text.class); wcJob.setMapOutputValueClass(IntWritable.class); //3,设置reducer wcJob.setReducerClass(WCReducer.class); //4,reducer输出 wcJob.setOutputKeyClass(Text.class); wcJob.setOutputValueClass(IntWritable.class); //5,设置数据的存放路径 FileInputFormat.setInputPaths(wcJob,new Path(args[0])); FileOutputFormat.setOutputPath(wcJob,new Path(args[1])); //6,提交任务 wcJob.waitForCompletion(true); }}
转载地址:http://neqzi.baihongyu.com/