def spaces(): """Separate characters with spaces.""" import sys for line in sys.stdin: sys.stdout.write(' '.join(line)) def spark_lines(): """Find the lines of shakespeare last in the alphabet. Run with spark-submit. E.g., cat shakespeare.txt | sort -r | head -n2 """ from pyspark import SparkContext sc = SparkContext() lines = sc.textFile('shakespeare.txt') a, b = lines.sortBy(lambda s: s, False).take(2) print(a) print(b) def vowels(line): """Yield (vowel, count) pairs.""" for v in 'aeiou': if v in line: yield (v, line.count(v)) def count_with_spark(): """Count vowels in a text file. Run with spark-submit. """ from pyspark import SparkContext from operator import add sc = SparkContext(appName="VowelCount") lines = sc.textFile('shakespeare.txt') vowels = lines.flatMap(vowels).reduceByKey(add).sortByKey().collect() print(vowels)