mllib/feature/JavaWord2VecSuite.java

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017
0018 package org.apache.spark.mllib.feature;
0019
0020 import java.util.Arrays;
0021 import java.util.List;
0022
0023 import com.google.common.base.Strings;
0024
0025 import scala.Tuple2;
0026
0027 import org.junit.Assert;
0028 import org.junit.Test;
0029
0030 import org.apache.spark.SharedSparkSession;
0031 import org.apache.spark.api.java.JavaRDD;
0032
0033 public class JavaWord2VecSuite extends SharedSparkSession {
0034
0035   @Test
0036   @SuppressWarnings("unchecked")
0037   public void word2Vec() {
0038     // The tests are to check Java compatibility.
0039     String sentence = Strings.repeat("a b ", 100) + Strings.repeat("a c ", 10);
0040     List<String> words = Arrays.asList(sentence.split(" "));
0041     List<List<String>> localDoc = Arrays.asList(words, words);
0042     JavaRDD<List<String>> doc = jsc.parallelize(localDoc);
0043     Word2Vec word2vec = new Word2Vec()
0044       .setVectorSize(10)
0045       .setSeed(42L);
0046     Word2VecModel model = word2vec.fit(doc);
0047     Tuple2<String, Object>[] syms = model.findSynonyms("a", 2);
0048     Assert.assertEquals(2, syms.length);
0049     Assert.assertEquals("b", syms[0]._1());
0050     Assert.assertEquals("c", syms[1]._1());
0051   }
0052 }