|
||||
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 package org.apache.spark.network.shuffle; 0019 0020 import java.io.File; 0021 import java.util.regex.Matcher; 0022 import java.util.regex.Pattern; 0023 0024 import com.google.common.annotations.VisibleForTesting; 0025 0026 import org.apache.spark.network.util.JavaUtils; 0027 0028 public class ExecutorDiskUtils { 0029 0030 private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}"); 0031 0032 /** 0033 * Hashes a filename into the corresponding local directory, in a manner consistent with 0034 * Spark's DiskBlockManager.getFile(). 0035 */ 0036 public static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) { 0037 int hash = JavaUtils.nonNegativeHash(filename); 0038 String localDir = localDirs[hash % localDirs.length]; 0039 int subDirId = (hash / localDirs.length) % subDirsPerLocalDir; 0040 return new File(createNormalizedInternedPathname( 0041 localDir, String.format("%02x", subDirId), filename)); 0042 } 0043 0044 /** 0045 * This method is needed to avoid the situation when multiple File instances for the 0046 * same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String. 0047 * According to measurements, in some scenarios such duplicate strings may waste a lot 0048 * of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that 0049 * we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise, 0050 * the internal code in java.io.File would normalize it later, creating a new "foo/bar" 0051 * String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File 0052 * uses, since it is in the package-private class java.io.FileSystem. 0053 */ 0054 @VisibleForTesting 0055 static String createNormalizedInternedPathname(String dir1, String dir2, String fname) { 0056 String pathname = dir1 + File.separator + dir2 + File.separator + fname; 0057 Matcher m = MULTIPLE_SEPARATORS.matcher(pathname); 0058 pathname = m.replaceAll("/"); 0059 // A single trailing slash needs to be taken care of separately 0060 if (pathname.length() > 1 && pathname.endsWith("/")) { 0061 pathname = pathname.substring(0, pathname.length() - 1); 0062 } 0063 return pathname.intern(); 0064 } 0065 0066 }
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |