#!/bin/python
from pyspark import SparkConf, SparkContext
if __name__ == "__main__":
conf = SparkConf().setMaster("local").setAppName("Challenge")
sc = SparkContext(conf = conf)
sc.setLogLevel("ERROR")
input = sc.textFile("/home/shiyanlou/data/")
words = input.flatMap(lambda line: line.split(' '))
counts = words.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b)
top5 = counts.sortBy(xxxxx, ascending = False).take(5)
for x in top5:
print(x)
sc.stop()
想要统计词频前五的,请问这行top5 = counts.sortBy(xxxxx, ascending = False).take(5)代码的参数如何修改
噜噜哒
哈士奇WWW
随时随地看视频慕课网APP