0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 from __future__ import print_function
0019
0020
0021 from pyspark.ml.feature import Interaction, VectorAssembler
0022
0023 from pyspark.sql import SparkSession
0024
0025 if __name__ == "__main__":
0026 spark = SparkSession\
0027 .builder\
0028 .appName("InteractionExample")\
0029 .getOrCreate()
0030
0031
0032 df = spark.createDataFrame(
0033 [(1, 1, 2, 3, 8, 4, 5),
0034 (2, 4, 3, 8, 7, 9, 8),
0035 (3, 6, 1, 9, 2, 3, 6),
0036 (4, 10, 8, 6, 9, 4, 5),
0037 (5, 9, 2, 7, 10, 7, 3),
0038 (6, 1, 1, 4, 2, 8, 4)],
0039 ["id1", "id2", "id3", "id4", "id5", "id6", "id7"])
0040
0041 assembler1 = VectorAssembler(inputCols=["id2", "id3", "id4"], outputCol="vec1")
0042
0043 assembled1 = assembler1.transform(df)
0044
0045 assembler2 = VectorAssembler(inputCols=["id5", "id6", "id7"], outputCol="vec2")
0046
0047 assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
0048
0049 interaction = Interaction(inputCols=["id1", "vec1", "vec2"], outputCol="interactedCol")
0050
0051 interacted = interaction.transform(assembled2)
0052
0053 interacted.show(truncate=False)
0054
0055
0056 spark.stop()