0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 from pyspark.ml.param import *
0021
0022
0023 class HasMaxIter(Params):
0024 """
0025 Mixin for param maxIter: max number of iterations (>= 0).
0026 """
0027
0028 maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0).", typeConverter=TypeConverters.toInt)
0029
0030 def __init__(self):
0031 super(HasMaxIter, self).__init__()
0032
0033 def getMaxIter(self):
0034 """
0035 Gets the value of maxIter or its default value.
0036 """
0037 return self.getOrDefault(self.maxIter)
0038
0039
0040 class HasRegParam(Params):
0041 """
0042 Mixin for param regParam: regularization parameter (>= 0).
0043 """
0044
0045 regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0).", typeConverter=TypeConverters.toFloat)
0046
0047 def __init__(self):
0048 super(HasRegParam, self).__init__()
0049
0050 def getRegParam(self):
0051 """
0052 Gets the value of regParam or its default value.
0053 """
0054 return self.getOrDefault(self.regParam)
0055
0056
0057 class HasFeaturesCol(Params):
0058 """
0059 Mixin for param featuresCol: features column name.
0060 """
0061
0062 featuresCol = Param(Params._dummy(), "featuresCol", "features column name.", typeConverter=TypeConverters.toString)
0063
0064 def __init__(self):
0065 super(HasFeaturesCol, self).__init__()
0066 self._setDefault(featuresCol='features')
0067
0068 def getFeaturesCol(self):
0069 """
0070 Gets the value of featuresCol or its default value.
0071 """
0072 return self.getOrDefault(self.featuresCol)
0073
0074
0075 class HasLabelCol(Params):
0076 """
0077 Mixin for param labelCol: label column name.
0078 """
0079
0080 labelCol = Param(Params._dummy(), "labelCol", "label column name.", typeConverter=TypeConverters.toString)
0081
0082 def __init__(self):
0083 super(HasLabelCol, self).__init__()
0084 self._setDefault(labelCol='label')
0085
0086 def getLabelCol(self):
0087 """
0088 Gets the value of labelCol or its default value.
0089 """
0090 return self.getOrDefault(self.labelCol)
0091
0092
0093 class HasPredictionCol(Params):
0094 """
0095 Mixin for param predictionCol: prediction column name.
0096 """
0097
0098 predictionCol = Param(Params._dummy(), "predictionCol", "prediction column name.", typeConverter=TypeConverters.toString)
0099
0100 def __init__(self):
0101 super(HasPredictionCol, self).__init__()
0102 self._setDefault(predictionCol='prediction')
0103
0104 def getPredictionCol(self):
0105 """
0106 Gets the value of predictionCol or its default value.
0107 """
0108 return self.getOrDefault(self.predictionCol)
0109
0110
0111 class HasProbabilityCol(Params):
0112 """
0113 Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
0114 """
0115
0116 probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.", typeConverter=TypeConverters.toString)
0117
0118 def __init__(self):
0119 super(HasProbabilityCol, self).__init__()
0120 self._setDefault(probabilityCol='probability')
0121
0122 def getProbabilityCol(self):
0123 """
0124 Gets the value of probabilityCol or its default value.
0125 """
0126 return self.getOrDefault(self.probabilityCol)
0127
0128
0129 class HasRawPredictionCol(Params):
0130 """
0131 Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.
0132 """
0133
0134 rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name.", typeConverter=TypeConverters.toString)
0135
0136 def __init__(self):
0137 super(HasRawPredictionCol, self).__init__()
0138 self._setDefault(rawPredictionCol='rawPrediction')
0139
0140 def getRawPredictionCol(self):
0141 """
0142 Gets the value of rawPredictionCol or its default value.
0143 """
0144 return self.getOrDefault(self.rawPredictionCol)
0145
0146
0147 class HasInputCol(Params):
0148 """
0149 Mixin for param inputCol: input column name.
0150 """
0151
0152 inputCol = Param(Params._dummy(), "inputCol", "input column name.", typeConverter=TypeConverters.toString)
0153
0154 def __init__(self):
0155 super(HasInputCol, self).__init__()
0156
0157 def getInputCol(self):
0158 """
0159 Gets the value of inputCol or its default value.
0160 """
0161 return self.getOrDefault(self.inputCol)
0162
0163
0164 class HasInputCols(Params):
0165 """
0166 Mixin for param inputCols: input column names.
0167 """
0168
0169 inputCols = Param(Params._dummy(), "inputCols", "input column names.", typeConverter=TypeConverters.toListString)
0170
0171 def __init__(self):
0172 super(HasInputCols, self).__init__()
0173
0174 def getInputCols(self):
0175 """
0176 Gets the value of inputCols or its default value.
0177 """
0178 return self.getOrDefault(self.inputCols)
0179
0180
0181 class HasOutputCol(Params):
0182 """
0183 Mixin for param outputCol: output column name.
0184 """
0185
0186 outputCol = Param(Params._dummy(), "outputCol", "output column name.", typeConverter=TypeConverters.toString)
0187
0188 def __init__(self):
0189 super(HasOutputCol, self).__init__()
0190 self._setDefault(outputCol=self.uid + '__output')
0191
0192 def getOutputCol(self):
0193 """
0194 Gets the value of outputCol or its default value.
0195 """
0196 return self.getOrDefault(self.outputCol)
0197
0198
0199 class HasOutputCols(Params):
0200 """
0201 Mixin for param outputCols: output column names.
0202 """
0203
0204 outputCols = Param(Params._dummy(), "outputCols", "output column names.", typeConverter=TypeConverters.toListString)
0205
0206 def __init__(self):
0207 super(HasOutputCols, self).__init__()
0208
0209 def getOutputCols(self):
0210 """
0211 Gets the value of outputCols or its default value.
0212 """
0213 return self.getOrDefault(self.outputCols)
0214
0215
0216 class HasNumFeatures(Params):
0217 """
0218 Mixin for param numFeatures: Number of features. Should be greater than 0.
0219 """
0220
0221 numFeatures = Param(Params._dummy(), "numFeatures", "Number of features. Should be greater than 0.", typeConverter=TypeConverters.toInt)
0222
0223 def __init__(self):
0224 super(HasNumFeatures, self).__init__()
0225 self._setDefault(numFeatures=262144)
0226
0227 def getNumFeatures(self):
0228 """
0229 Gets the value of numFeatures or its default value.
0230 """
0231 return self.getOrDefault(self.numFeatures)
0232
0233
0234 class HasCheckpointInterval(Params):
0235 """
0236 Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
0237 """
0238
0239 checkpointInterval = Param(Params._dummy(), "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.", typeConverter=TypeConverters.toInt)
0240
0241 def __init__(self):
0242 super(HasCheckpointInterval, self).__init__()
0243
0244 def getCheckpointInterval(self):
0245 """
0246 Gets the value of checkpointInterval or its default value.
0247 """
0248 return self.getOrDefault(self.checkpointInterval)
0249
0250
0251 class HasSeed(Params):
0252 """
0253 Mixin for param seed: random seed.
0254 """
0255
0256 seed = Param(Params._dummy(), "seed", "random seed.", typeConverter=TypeConverters.toInt)
0257
0258 def __init__(self):
0259 super(HasSeed, self).__init__()
0260 self._setDefault(seed=hash(type(self).__name__))
0261
0262 def getSeed(self):
0263 """
0264 Gets the value of seed or its default value.
0265 """
0266 return self.getOrDefault(self.seed)
0267
0268
0269 class HasTol(Params):
0270 """
0271 Mixin for param tol: the convergence tolerance for iterative algorithms (>= 0).
0272 """
0273
0274 tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms (>= 0).", typeConverter=TypeConverters.toFloat)
0275
0276 def __init__(self):
0277 super(HasTol, self).__init__()
0278
0279 def getTol(self):
0280 """
0281 Gets the value of tol or its default value.
0282 """
0283 return self.getOrDefault(self.tol)
0284
0285
0286 class HasRelativeError(Params):
0287 """
0288 Mixin for param relativeError: the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]
0289 """
0290
0291 relativeError = Param(Params._dummy(), "relativeError", "the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]", typeConverter=TypeConverters.toFloat)
0292
0293 def __init__(self):
0294 super(HasRelativeError, self).__init__()
0295 self._setDefault(relativeError=0.001)
0296
0297 def getRelativeError(self):
0298 """
0299 Gets the value of relativeError or its default value.
0300 """
0301 return self.getOrDefault(self.relativeError)
0302
0303
0304 class HasStepSize(Params):
0305 """
0306 Mixin for param stepSize: Step size to be used for each iteration of optimization (>= 0).
0307 """
0308
0309 stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization (>= 0).", typeConverter=TypeConverters.toFloat)
0310
0311 def __init__(self):
0312 super(HasStepSize, self).__init__()
0313
0314 def getStepSize(self):
0315 """
0316 Gets the value of stepSize or its default value.
0317 """
0318 return self.getOrDefault(self.stepSize)
0319
0320
0321 class HasHandleInvalid(Params):
0322 """
0323 Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.
0324 """
0325
0326 handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.", typeConverter=TypeConverters.toString)
0327
0328 def __init__(self):
0329 super(HasHandleInvalid, self).__init__()
0330
0331 def getHandleInvalid(self):
0332 """
0333 Gets the value of handleInvalid or its default value.
0334 """
0335 return self.getOrDefault(self.handleInvalid)
0336
0337
0338 class HasElasticNetParam(Params):
0339 """
0340 Mixin for param elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
0341 """
0342
0343 elasticNetParam = Param(Params._dummy(), "elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", typeConverter=TypeConverters.toFloat)
0344
0345 def __init__(self):
0346 super(HasElasticNetParam, self).__init__()
0347 self._setDefault(elasticNetParam=0.0)
0348
0349 def getElasticNetParam(self):
0350 """
0351 Gets the value of elasticNetParam or its default value.
0352 """
0353 return self.getOrDefault(self.elasticNetParam)
0354
0355
0356 class HasFitIntercept(Params):
0357 """
0358 Mixin for param fitIntercept: whether to fit an intercept term.
0359 """
0360
0361 fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.", typeConverter=TypeConverters.toBoolean)
0362
0363 def __init__(self):
0364 super(HasFitIntercept, self).__init__()
0365 self._setDefault(fitIntercept=True)
0366
0367 def getFitIntercept(self):
0368 """
0369 Gets the value of fitIntercept or its default value.
0370 """
0371 return self.getOrDefault(self.fitIntercept)
0372
0373
0374 class HasStandardization(Params):
0375 """
0376 Mixin for param standardization: whether to standardize the training features before fitting the model.
0377 """
0378
0379 standardization = Param(Params._dummy(), "standardization", "whether to standardize the training features before fitting the model.", typeConverter=TypeConverters.toBoolean)
0380
0381 def __init__(self):
0382 super(HasStandardization, self).__init__()
0383 self._setDefault(standardization=True)
0384
0385 def getStandardization(self):
0386 """
0387 Gets the value of standardization or its default value.
0388 """
0389 return self.getOrDefault(self.standardization)
0390
0391
0392 class HasThresholds(Params):
0393 """
0394 Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
0395 """
0396
0397 thresholds = Param(Params._dummy(), "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.", typeConverter=TypeConverters.toListFloat)
0398
0399 def __init__(self):
0400 super(HasThresholds, self).__init__()
0401
0402 def getThresholds(self):
0403 """
0404 Gets the value of thresholds or its default value.
0405 """
0406 return self.getOrDefault(self.thresholds)
0407
0408
0409 class HasThreshold(Params):
0410 """
0411 Mixin for param threshold: threshold in binary classification prediction, in range [0, 1]
0412 """
0413
0414 threshold = Param(Params._dummy(), "threshold", "threshold in binary classification prediction, in range [0, 1]", typeConverter=TypeConverters.toFloat)
0415
0416 def __init__(self):
0417 super(HasThreshold, self).__init__()
0418 self._setDefault(threshold=0.5)
0419
0420 def getThreshold(self):
0421 """
0422 Gets the value of threshold or its default value.
0423 """
0424 return self.getOrDefault(self.threshold)
0425
0426
0427 class HasWeightCol(Params):
0428 """
0429 Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0.
0430 """
0431
0432 weightCol = Param(Params._dummy(), "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.", typeConverter=TypeConverters.toString)
0433
0434 def __init__(self):
0435 super(HasWeightCol, self).__init__()
0436
0437 def getWeightCol(self):
0438 """
0439 Gets the value of weightCol or its default value.
0440 """
0441 return self.getOrDefault(self.weightCol)
0442
0443
0444 class HasSolver(Params):
0445 """
0446 Mixin for param solver: the solver algorithm for optimization. If this is not set or empty, default value is 'auto'.
0447 """
0448
0449 solver = Param(Params._dummy(), "solver", "the solver algorithm for optimization. If this is not set or empty, default value is 'auto'.", typeConverter=TypeConverters.toString)
0450
0451 def __init__(self):
0452 super(HasSolver, self).__init__()
0453 self._setDefault(solver='auto')
0454
0455 def getSolver(self):
0456 """
0457 Gets the value of solver or its default value.
0458 """
0459 return self.getOrDefault(self.solver)
0460
0461
0462 class HasVarianceCol(Params):
0463 """
0464 Mixin for param varianceCol: column name for the biased sample variance of prediction.
0465 """
0466
0467 varianceCol = Param(Params._dummy(), "varianceCol", "column name for the biased sample variance of prediction.", typeConverter=TypeConverters.toString)
0468
0469 def __init__(self):
0470 super(HasVarianceCol, self).__init__()
0471
0472 def getVarianceCol(self):
0473 """
0474 Gets the value of varianceCol or its default value.
0475 """
0476 return self.getOrDefault(self.varianceCol)
0477
0478
0479 class HasAggregationDepth(Params):
0480 """
0481 Mixin for param aggregationDepth: suggested depth for treeAggregate (>= 2).
0482 """
0483
0484 aggregationDepth = Param(Params._dummy(), "aggregationDepth", "suggested depth for treeAggregate (>= 2).", typeConverter=TypeConverters.toInt)
0485
0486 def __init__(self):
0487 super(HasAggregationDepth, self).__init__()
0488 self._setDefault(aggregationDepth=2)
0489
0490 def getAggregationDepth(self):
0491 """
0492 Gets the value of aggregationDepth or its default value.
0493 """
0494 return self.getOrDefault(self.aggregationDepth)
0495
0496
0497 class HasParallelism(Params):
0498 """
0499 Mixin for param parallelism: the number of threads to use when running parallel algorithms (>= 1).
0500 """
0501
0502 parallelism = Param(Params._dummy(), "parallelism", "the number of threads to use when running parallel algorithms (>= 1).", typeConverter=TypeConverters.toInt)
0503
0504 def __init__(self):
0505 super(HasParallelism, self).__init__()
0506 self._setDefault(parallelism=1)
0507
0508 def getParallelism(self):
0509 """
0510 Gets the value of parallelism or its default value.
0511 """
0512 return self.getOrDefault(self.parallelism)
0513
0514
0515 class HasCollectSubModels(Params):
0516 """
0517 Mixin for param collectSubModels: Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.
0518 """
0519
0520 collectSubModels = Param(Params._dummy(), "collectSubModels", "Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.", typeConverter=TypeConverters.toBoolean)
0521
0522 def __init__(self):
0523 super(HasCollectSubModels, self).__init__()
0524 self._setDefault(collectSubModels=False)
0525
0526 def getCollectSubModels(self):
0527 """
0528 Gets the value of collectSubModels or its default value.
0529 """
0530 return self.getOrDefault(self.collectSubModels)
0531
0532
0533 class HasLoss(Params):
0534 """
0535 Mixin for param loss: the loss function to be optimized.
0536 """
0537
0538 loss = Param(Params._dummy(), "loss", "the loss function to be optimized.", typeConverter=TypeConverters.toString)
0539
0540 def __init__(self):
0541 super(HasLoss, self).__init__()
0542
0543 def getLoss(self):
0544 """
0545 Gets the value of loss or its default value.
0546 """
0547 return self.getOrDefault(self.loss)
0548
0549
0550 class HasDistanceMeasure(Params):
0551 """
0552 Mixin for param distanceMeasure: the distance measure. Supported options: 'euclidean' and 'cosine'.
0553 """
0554
0555 distanceMeasure = Param(Params._dummy(), "distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.", typeConverter=TypeConverters.toString)
0556
0557 def __init__(self):
0558 super(HasDistanceMeasure, self).__init__()
0559 self._setDefault(distanceMeasure='euclidean')
0560
0561 def getDistanceMeasure(self):
0562 """
0563 Gets the value of distanceMeasure or its default value.
0564 """
0565 return self.getOrDefault(self.distanceMeasure)
0566
0567
0568 class HasValidationIndicatorCol(Params):
0569 """
0570 Mixin for param validationIndicatorCol: name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.
0571 """
0572
0573 validationIndicatorCol = Param(Params._dummy(), "validationIndicatorCol", "name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.", typeConverter=TypeConverters.toString)
0574
0575 def __init__(self):
0576 super(HasValidationIndicatorCol, self).__init__()
0577
0578 def getValidationIndicatorCol(self):
0579 """
0580 Gets the value of validationIndicatorCol or its default value.
0581 """
0582 return self.getOrDefault(self.validationIndicatorCol)
0583
0584
0585 class HasBlockSize(Params):
0586 """
0587 Mixin for param blockSize: block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.
0588 """
0589
0590 blockSize = Param(Params._dummy(), "blockSize", "block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.", typeConverter=TypeConverters.toInt)
0591
0592 def __init__(self):
0593 super(HasBlockSize, self).__init__()
0594
0595 def getBlockSize(self):
0596 """
0597 Gets the value of blockSize or its default value.
0598 """
0599 return self.getOrDefault(self.blockSize)