Back to home page

OSCL-LXR

 
 

    


0001 #
0002 # Licensed to the Apache Software Foundation (ASF) under one or more
0003 # contributor license agreements.  See the NOTICE file distributed with
0004 # this work for additional information regarding copyright ownership.
0005 # The ASF licenses this file to You under the Apache License, Version 2.0
0006 # (the "License"); you may not use this file except in compliance with
0007 # the License.  You may obtain a copy of the License at
0008 #
0009 #    http://www.apache.org/licenses/LICENSE-2.0
0010 #
0011 # Unless required by applicable law or agreed to in writing, software
0012 # distributed under the License is distributed on an "AS IS" BASIS,
0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014 # See the License for the specific language governing permissions and
0015 # limitations under the License.
0016 #
0017 
0018 from __future__ import print_function
0019 
0020 # $example on$
0021 from pyspark.ml.feature import Interaction, VectorAssembler
0022 # $example off$
0023 from pyspark.sql import SparkSession
0024 
0025 if __name__ == "__main__":
0026     spark = SparkSession\
0027         .builder\
0028         .appName("InteractionExample")\
0029         .getOrCreate()
0030 
0031     # $example on$
0032     df = spark.createDataFrame(
0033         [(1, 1, 2, 3, 8, 4, 5),
0034          (2, 4, 3, 8, 7, 9, 8),
0035          (3, 6, 1, 9, 2, 3, 6),
0036          (4, 10, 8, 6, 9, 4, 5),
0037          (5, 9, 2, 7, 10, 7, 3),
0038          (6, 1, 1, 4, 2, 8, 4)],
0039         ["id1", "id2", "id3", "id4", "id5", "id6", "id7"])
0040 
0041     assembler1 = VectorAssembler(inputCols=["id2", "id3", "id4"], outputCol="vec1")
0042 
0043     assembled1 = assembler1.transform(df)
0044 
0045     assembler2 = VectorAssembler(inputCols=["id5", "id6", "id7"], outputCol="vec2")
0046 
0047     assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
0048 
0049     interaction = Interaction(inputCols=["id1", "vec1", "vec2"], outputCol="interactedCol")
0050 
0051     interacted = interaction.transform(assembled2)
0052 
0053     interacted.show(truncate=False)
0054     # $example off$
0055 
0056     spark.stop()