0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 Logistic regression using MLlib.
0020
0021 This example requires NumPy (http://www.numpy.org/).
0022 """
0023 from __future__ import print_function
0024
0025 import sys
0026
0027 from pyspark import SparkContext
0028 from pyspark.mllib.regression import LabeledPoint
0029 from pyspark.mllib.classification import LogisticRegressionWithSGD
0030
0031
0032 def parsePoint(line):
0033 """
0034 Parse a line of text into an MLlib LabeledPoint object.
0035 """
0036 values = [float(s) for s in line.split(' ')]
0037 if values[0] == -1:
0038 values[0] = 0
0039 return LabeledPoint(values[0], values[1:])
0040
0041
0042 if __name__ == "__main__":
0043 if len(sys.argv) != 3:
0044 print("Usage: logistic_regression <file> <iterations>", file=sys.stderr)
0045 sys.exit(-1)
0046 sc = SparkContext(appName="PythonLR")
0047 points = sc.textFile(sys.argv[1]).map(parsePoint)
0048 iterations = int(sys.argv[2])
0049 model = LogisticRegressionWithSGD.train(points, iterations)
0050 print("Final weights: " + str(model.weights))
0051 print("Final intercept: " + str(model.intercept))
0052 sc.stop()