Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.network.shuffle;
0019 
0020 import java.io.File;
0021 import java.util.regex.Matcher;
0022 import java.util.regex.Pattern;
0023 
0024 import com.google.common.annotations.VisibleForTesting;
0025 
0026 import org.apache.spark.network.util.JavaUtils;
0027 
0028 public class ExecutorDiskUtils {
0029 
0030   private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}");
0031 
0032   /**
0033    * Hashes a filename into the corresponding local directory, in a manner consistent with
0034    * Spark's DiskBlockManager.getFile().
0035    */
0036   public static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
0037     int hash = JavaUtils.nonNegativeHash(filename);
0038     String localDir = localDirs[hash % localDirs.length];
0039     int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
0040     return new File(createNormalizedInternedPathname(
0041         localDir, String.format("%02x", subDirId), filename));
0042   }
0043 
0044   /**
0045    * This method is needed to avoid the situation when multiple File instances for the
0046    * same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String.
0047    * According to measurements, in some scenarios such duplicate strings may waste a lot
0048    * of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that
0049    * we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise,
0050    * the internal code in java.io.File would normalize it later, creating a new "foo/bar"
0051    * String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
0052    * uses, since it is in the package-private class java.io.FileSystem.
0053    */
0054   @VisibleForTesting
0055   static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
0056     String pathname = dir1 + File.separator + dir2 + File.separator + fname;
0057     Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
0058     pathname = m.replaceAll("/");
0059     // A single trailing slash needs to be taken care of separately
0060     if (pathname.length() > 1 && pathname.endsWith("/")) {
0061       pathname = pathname.substring(0, pathname.length() - 1);
0062     }
0063     return pathname.intern();
0064   }
0065 
0066 }