0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.ml;
0019
0020
0021 import java.util.Arrays;
0022 import java.util.List;
0023
0024 import org.apache.spark.ml.fpm.PrefixSpan;
0025 import org.apache.spark.sql.Dataset;
0026 import org.apache.spark.sql.Row;
0027 import org.apache.spark.sql.RowFactory;
0028 import org.apache.spark.sql.SparkSession;
0029 import org.apache.spark.sql.types.*;
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039 public class JavaPrefixSpanExample {
0040 public static void main(String[] args) {
0041 SparkSession spark = SparkSession
0042 .builder()
0043 .appName("JavaPrefixSpanExample")
0044 .getOrCreate();
0045
0046
0047 List<Row> data = Arrays.asList(
0048 RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3))),
0049 RowFactory.create(Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1,2))),
0050 RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5))),
0051 RowFactory.create(Arrays.asList(Arrays.asList(6)))
0052 );
0053 StructType schema = new StructType(new StructField[]{ new StructField(
0054 "sequence", new ArrayType(new ArrayType(DataTypes.IntegerType, true), true),
0055 false, Metadata.empty())
0056 });
0057 Dataset<Row> sequenceDF = spark.createDataFrame(data, schema);
0058
0059 PrefixSpan prefixSpan = new PrefixSpan().setMinSupport(0.5).setMaxPatternLength(5);
0060
0061
0062 prefixSpan.findFrequentSequentialPatterns(sequenceDF).show();
0063
0064
0065 spark.stop();
0066 }
0067 }