0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.ml;
0019
0020 import org.apache.spark.ml.feature.Interaction;
0021 import org.apache.spark.ml.feature.VectorAssembler;
0022 import org.apache.spark.sql.*;
0023 import org.apache.spark.sql.types.DataTypes;
0024 import org.apache.spark.sql.types.Metadata;
0025 import org.apache.spark.sql.types.StructField;
0026 import org.apache.spark.sql.types.StructType;
0027
0028 import java.util.Arrays;
0029 import java.util.List;
0030
0031
0032
0033
0034 public class JavaInteractionExample {
0035 public static void main(String[] args) {
0036 SparkSession spark = SparkSession
0037 .builder()
0038 .appName("JavaInteractionExample")
0039 .getOrCreate();
0040
0041
0042 List<Row> data = Arrays.asList(
0043 RowFactory.create(1, 1, 2, 3, 8, 4, 5),
0044 RowFactory.create(2, 4, 3, 8, 7, 9, 8),
0045 RowFactory.create(3, 6, 1, 9, 2, 3, 6),
0046 RowFactory.create(4, 10, 8, 6, 9, 4, 5),
0047 RowFactory.create(5, 9, 2, 7, 10, 7, 3),
0048 RowFactory.create(6, 1, 1, 4, 2, 8, 4)
0049 );
0050
0051 StructType schema = new StructType(new StructField[]{
0052 new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
0053 new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
0054 new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()),
0055 new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()),
0056 new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()),
0057 new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()),
0058 new StructField("id7", DataTypes.IntegerType, false, Metadata.empty())
0059 });
0060
0061 Dataset<Row> df = spark.createDataFrame(data, schema);
0062
0063 VectorAssembler assembler1 = new VectorAssembler()
0064 .setInputCols(new String[]{"id2", "id3", "id4"})
0065 .setOutputCol("vec1");
0066
0067 Dataset<Row> assembled1 = assembler1.transform(df);
0068
0069 VectorAssembler assembler2 = new VectorAssembler()
0070 .setInputCols(new String[]{"id5", "id6", "id7"})
0071 .setOutputCol("vec2");
0072
0073 Dataset<Row> assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2");
0074
0075 Interaction interaction = new Interaction()
0076 .setInputCols(new String[]{"id1","vec1","vec2"})
0077 .setOutputCol("interactedCol");
0078
0079 Dataset<Row> interacted = interaction.transform(assembled2);
0080
0081 interacted.show(false);
0082
0083
0084 spark.stop();
0085 }
0086 }
0087