求大神详解,不太懂

package com.zkpk.us;


import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


class UserMapper extends Mapper<LongWritable, Text, Text, IntWritable> {


protected void map(

LongWritable key,

Text value,

org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>.Context context)

throws java.io.IOException, InterruptedException {

IntWritable one = new IntWritable(1);

String[] columns = value.toString().split("\t");

if (columns != null && columns.length == 6) {


Text uid = new Text(columns[1]);


context.write(uid, one);


}

};

}


class UserReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

protected void reduce(

Text key,

java.lang.Iterable<IntWritable> values,

org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context)

throws java.io.IOException, InterruptedException {

int sum = 0;

for(IntWritable value : values){

sum += value.get();

}

context.write(key, new IntWritable(sum));

};

}


public class UserCount {


/**

* @param args

*/

public static void main(String[] args)throws Exception {

Configuration conf = new Configuration();

Job job = new Job(conf, "UserUid");

job.setJarByClass(UserCount.class);

job.setInputFormatClass(TextInputFormat.class);

job.setOutputFormatClass(TextOutputFormat.class);

job.setMapperClass(UserMapper.class);

job.setReducerClass(UserReducer.class);

FileInputFormat.addInputPath(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

job.waitForCompletion(true);


}


}


Youruncle
浏览 1345回答 3
3回答

gyf451

这是hadoop的wordCount程序啊,对input文件下的文档单词分类计数,你在hadoop output 文件夹下可以看到,每个单词出现的频数

赵强老师

这是MapReduce程序,是Hadoop处理数据的核心。程序有三部分组成:第一部分:Map。表示将数据进行分词处理,分隔符是制表键第二部分:Reduce。将Map的输出进行汇总,得到最后的输出。第三部分:主程序。将Map和Reduce组成一个任务job,来执行,数据的输入和输出都来至于HDFS。有问题,可以再问我。呵呵

_白驹过隙_

直接说 什么问题    直接贴代码  不想看
打开App,查看更多内容
随时随地看视频慕课网APP