老师能帮忙看下这个是怎么回事吗，谢谢-慕课网

老师能帮忙看下这个是怎么回事吗，谢谢

来源：3-2 MapReduce实战

慕仰2907144

2019-12-06 22:22

sudo /opt/hadoop/hadoop-2.10.0/bin/hadoop jar /opt/hadoop/hadoop-2.10.0/share/hadoop/tools/lib/hadoop-streaming-2.10.0.jar -files "hdfs_map.py,hdfs_reduce.py" -input /input/student.txt -output /tmp/wordcounttest -mapper "/root/anaconda3/bin/python hdfs_map.py" -reducer "/root/anaconda3/bin/python hdfs_reduce.py"sudo /opt/hadoop/hadoop-2.10.0/bin/hadoop jar /opt/hadoop/hadoop-2.10.0/share/hadoop/tools/lib/hadoop-streaming-2.10.0.jar -files "hdfs_map.py,hdfs_reduce.py" -input /input/student.txt -output /tmp/wordcounttest -mapper "/root/anaconda3/bin/python hdfs_map.py" -reducer "/root/anaconda3/bin/python hdfs_reduce.py"

#!/opt/anaconda3/bin/python
#-*- coding:utf-8 -*-
import sys
def read_input(file):
for line in file:
yield line.split()
def main():
data=read_input(sys.stdin)
for words in data:
for word in words:
print("%s%s%d" % (word,'\t',1))
if __name__=='__main__':
main()

#!/opt/anaconda3/bin/python
# -*- coding:utf-8 -*-
import sys 
from operatorimportitemgetter
from itertoolsimport groupby
def read_mapper_output(file,separator='\t'):
for line in file:
yieldline.rstrip().split(separator,1)
def main():
data=read_mapper_output(sys.stdin)
for current_word,group in groupby(data,itemgetter(0)):
total_count=sum(int(count) for current_word,count in group)
print("%s%s%d"%(current_word,'\t',total_count))
if __name__=='__main__':
main()

写回答关注