0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 Randomly generated RDDs.
0020 """
0021 from __future__ import print_function
0022
0023 import sys
0024
0025 from pyspark import SparkContext
0026 from pyspark.mllib.random import RandomRDDs
0027
0028
0029 if __name__ == "__main__":
0030 if len(sys.argv) not in [1, 2]:
0031 print("Usage: random_rdd_generation", file=sys.stderr)
0032 sys.exit(-1)
0033
0034 sc = SparkContext(appName="PythonRandomRDDGeneration")
0035
0036 numExamples = 10000
0037 fraction = 0.1
0038
0039
0040 normalRDD = RandomRDDs.normalRDD(sc, numExamples)
0041 print('Generated RDD of %d examples sampled from the standard normal distribution'
0042 % normalRDD.count())
0043 print(' First 5 samples:')
0044 for sample in normalRDD.take(5):
0045 print(' ' + str(sample))
0046 print()
0047
0048
0049 normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows=numExamples, numCols=2)
0050 print('Generated RDD of %d examples of length-2 vectors.' % normalVectorRDD.count())
0051 print(' First 5 samples:')
0052 for sample in normalVectorRDD.take(5):
0053 print(' ' + str(sample))
0054 print()
0055
0056 sc.stop()