6.RDD算子实战

 

 1 from pyspark import SparkContext,SparkConf
 2 import sys
 3 if __name__ == '__main__':
 4     if len(sys.argv) != 2:
 5         print("Usage: wordcount <input>",file=sys.stderr)
 6         sys.exit(-1)
 7  
 8     conf = SparkConf()
 9     sc = SparkContext(conf=conf)
10  
11  
12     counts = sc.textFile(sys.argv[1])\
13         .flatMap(lambda line:line.split(" "))\
14         .map(lambda x:(x,1))\
15         .reduceByKey(lambda a,b : a+b)
16  
17     output = counts.collect()
18     for (word,count) in output :
19         print("%s: %i" % (word,count))
20  
21  
22     sc.stop()
23  
 
 
 
 
 
 
 
 
 
 
 
 
 1 from pyspark import SparkContext,SparkConf
 2 import  sys
 3 if __name__ == '__main__':
 4     if len(sys.argv) != 2:
 5         print("Usage: avg <input>",file = sys.stderr)
 6         sys.exit(-1)
 7  
 8     conf = SparkConf()
 9     sc = SparkContext(conf=conf)
10     ageData = sc.textFile(sys.argv[1]).map(lambda line:line.split(" ")[1])
11     totalAge = ageData.map(lambda x:int(x)).reduce(lambda a,b:a+b)
12     count = ageData.count()
13     avgAge = totalAge / count
14  
15     print("totalAge:%s"%totalAge)
16     print("count:%s"%count)
17     print("avgAge:%s"%avgAge)
18  
19     sc.stop()
 
 

猜你喜欢

转载自www.cnblogs.com/huangguoming/p/11038005.html