Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.examples;
0019 
0020 import scala.Tuple2;
0021 
0022 import org.apache.spark.api.java.JavaPairRDD;
0023 import org.apache.spark.api.java.JavaRDD;
0024 import org.apache.spark.sql.SparkSession;
0025 
0026 import java.util.Arrays;
0027 import java.util.List;
0028 import java.util.regex.Pattern;
0029 
0030 public final class JavaWordCount {
0031   private static final Pattern SPACE = Pattern.compile(" ");
0032 
0033   public static void main(String[] args) throws Exception {
0034 
0035     if (args.length < 1) {
0036       System.err.println("Usage: JavaWordCount <file>");
0037       System.exit(1);
0038     }
0039 
0040     SparkSession spark = SparkSession
0041       .builder()
0042       .appName("JavaWordCount")
0043       .getOrCreate();
0044 
0045     JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();
0046 
0047     JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
0048 
0049     JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));
0050 
0051     JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);
0052 
0053     List<Tuple2<String, Integer>> output = counts.collect();
0054     for (Tuple2<?,?> tuple : output) {
0055       System.out.println(tuple._1() + ": " + tuple._2());
0056     }
0057     spark.stop();
0058   }
0059 }