Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.examples.ml;
0019 
0020 import org.apache.spark.ml.feature.Interaction;
0021 import org.apache.spark.ml.feature.VectorAssembler;
0022 import org.apache.spark.sql.*;
0023 import org.apache.spark.sql.types.DataTypes;
0024 import org.apache.spark.sql.types.Metadata;
0025 import org.apache.spark.sql.types.StructField;
0026 import org.apache.spark.sql.types.StructType;
0027 
0028 import java.util.Arrays;
0029 import java.util.List;
0030 
0031 // $example on$
0032 // $example off$
0033 
0034 public class JavaInteractionExample {
0035   public static void main(String[] args) {
0036     SparkSession spark = SparkSession
0037       .builder()
0038       .appName("JavaInteractionExample")
0039       .getOrCreate();
0040 
0041     // $example on$
0042     List<Row> data = Arrays.asList(
0043       RowFactory.create(1, 1, 2, 3, 8, 4, 5),
0044       RowFactory.create(2, 4, 3, 8, 7, 9, 8),
0045       RowFactory.create(3, 6, 1, 9, 2, 3, 6),
0046       RowFactory.create(4, 10, 8, 6, 9, 4, 5),
0047       RowFactory.create(5, 9, 2, 7, 10, 7, 3),
0048       RowFactory.create(6, 1, 1, 4, 2, 8, 4)
0049     );
0050 
0051     StructType schema = new StructType(new StructField[]{
0052       new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
0053       new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
0054       new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()),
0055       new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()),
0056       new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()),
0057       new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()),
0058       new StructField("id7", DataTypes.IntegerType, false, Metadata.empty())
0059     });
0060 
0061     Dataset<Row> df = spark.createDataFrame(data, schema);
0062 
0063     VectorAssembler assembler1 = new VectorAssembler()
0064             .setInputCols(new String[]{"id2", "id3", "id4"})
0065             .setOutputCol("vec1");
0066 
0067     Dataset<Row> assembled1 = assembler1.transform(df);
0068 
0069     VectorAssembler assembler2 = new VectorAssembler()
0070             .setInputCols(new String[]{"id5", "id6", "id7"})
0071             .setOutputCol("vec2");
0072 
0073     Dataset<Row> assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2");
0074 
0075     Interaction interaction = new Interaction()
0076             .setInputCols(new String[]{"id1","vec1","vec2"})
0077             .setOutputCol("interactedCol");
0078 
0079     Dataset<Row> interacted = interaction.transform(assembled2);
0080 
0081     interacted.show(false);
0082     // $example off$
0083 
0084     spark.stop();
0085   }
0086 }
0087