Back to home page

OSCL-LXR

 
 

    


0001 #
0002 # Licensed to the Apache Software Foundation (ASF) under one or more
0003 # contributor license agreements.  See the NOTICE file distributed with
0004 # this work for additional information regarding copyright ownership.
0005 # The ASF licenses this file to You under the Apache License, Version 2.0
0006 # (the "License"); you may not use this file except in compliance with
0007 # the License.  You may obtain a copy of the License at
0008 #
0009 #    http://www.apache.org/licenses/LICENSE-2.0
0010 #
0011 # Unless required by applicable law or agreed to in writing, software
0012 # distributed under the License is distributed on an "AS IS" BASIS,
0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014 # See the License for the specific language governing permissions and
0015 # limitations under the License.
0016 #
0017 
0018 """
0019 Randomly generated RDDs.
0020 """
0021 from __future__ import print_function
0022 
0023 import sys
0024 
0025 from pyspark import SparkContext
0026 from pyspark.mllib.random import RandomRDDs
0027 
0028 
0029 if __name__ == "__main__":
0030     if len(sys.argv) not in [1, 2]:
0031         print("Usage: random_rdd_generation", file=sys.stderr)
0032         sys.exit(-1)
0033 
0034     sc = SparkContext(appName="PythonRandomRDDGeneration")
0035 
0036     numExamples = 10000  # number of examples to generate
0037     fraction = 0.1  # fraction of data to sample
0038 
0039     # Example: RandomRDDs.normalRDD
0040     normalRDD = RandomRDDs.normalRDD(sc, numExamples)
0041     print('Generated RDD of %d examples sampled from the standard normal distribution'
0042           % normalRDD.count())
0043     print('  First 5 samples:')
0044     for sample in normalRDD.take(5):
0045         print('    ' + str(sample))
0046     print()
0047 
0048     # Example: RandomRDDs.normalVectorRDD
0049     normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows=numExamples, numCols=2)
0050     print('Generated RDD of %d examples of length-2 vectors.' % normalVectorRDD.count())
0051     print('  First 5 samples:')
0052     for sample in normalVectorRDD.take(5):
0053         print('    ' + str(sample))
0054     print()
0055 
0056     sc.stop()