|
||||
0001 /** 0002 * Copyright 2015 Stijn de Gouw 0003 * 0004 * Licensed under the Apache License, Version 2.0 (the "License"); 0005 * you may not use this file except in compliance with the License. 0006 * You may obtain a copy of the License at 0007 * 0008 * http://www.apache.org/licenses/LICENSE-2.0 0009 * 0010 * Unless required by applicable law or agreed to in writing, software 0011 * distributed under the License is distributed on an "AS IS" BASIS, 0012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0013 * See the License for the specific language governing permissions and 0014 * limitations under the License. 0015 * 0016 */ 0017 package org.apache.spark.util.collection; 0018 0019 import java.util.*; 0020 0021 /** 0022 * This codes generates a int array which fails the standard TimSort. 0023 * 0024 * The blog that reported the bug 0025 * http://www.envisage-project.eu/timsort-specification-and-verification/ 0026 * 0027 * This codes was originally wrote by Stijn de Gouw, modified by Evan Yu to adapt to 0028 * our test suite. 0029 * 0030 * https://github.com/abstools/java-timsort-bug 0031 * https://github.com/abstools/java-timsort-bug/blob/master/LICENSE 0032 */ 0033 public class TestTimSort { 0034 0035 private static final int MIN_MERGE = 32; 0036 0037 /** 0038 * Returns an array of integers that demonstrate the bug in TimSort 0039 */ 0040 public static int[] getTimSortBugTestSet(int length) { 0041 int minRun = minRunLength(length); 0042 List<Long> runs = runsJDKWorstCase(minRun, length); 0043 return createArray(runs, length); 0044 } 0045 0046 private static int minRunLength(int n) { 0047 int r = 0; // Becomes 1 if any 1 bits are shifted off 0048 while (n >= MIN_MERGE) { 0049 r |= (n & 1); 0050 n >>= 1; 0051 } 0052 return n + r; 0053 } 0054 0055 private static int[] createArray(List<Long> runs, int length) { 0056 int[] a = new int[length]; 0057 Arrays.fill(a, 0); 0058 int endRun = -1; 0059 for (long len : runs) { 0060 a[endRun += len] = 1; 0061 } 0062 a[length - 1] = 0; 0063 return a; 0064 } 0065 0066 /** 0067 * Fills <code>runs</code> with a sequence of run lengths of the form<br> 0068 * Y_n x_{n,1} x_{n,2} ... x_{n,l_n} <br> 0069 * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br> 0070 * ... <br> 0071 * Y_1 x_{1,1} x_{1,2} ... x_{1,l_1}<br> 0072 * The Y_i's are chosen to satisfy the invariant throughout execution, 0073 * but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>) 0074 * into an X_i that violates the invariant. 0075 * 0076 * @param length The sum of all run lengths that will be added to <code>runs</code>. 0077 */ 0078 private static List<Long> runsJDKWorstCase(int minRun, int length) { 0079 List<Long> runs = new ArrayList<>(); 0080 0081 long runningTotal = 0, Y = minRun + 4, X = minRun; 0082 0083 while (runningTotal + Y + X <= length) { 0084 runningTotal += X + Y; 0085 generateJDKWrongElem(runs, minRun, X); 0086 runs.add(0, Y); 0087 // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1} 0088 X = Y + runs.get(1) + 1; 0089 // Y_{i+1} = X_{i+1} + Y_i + 1 0090 Y += X + 1; 0091 } 0092 0093 if (runningTotal + X <= length) { 0094 runningTotal += X; 0095 generateJDKWrongElem(runs, minRun, X); 0096 } 0097 0098 runs.add(length - runningTotal); 0099 return runs; 0100 } 0101 0102 /** 0103 * Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br> 0104 * 1. X = x_1 + ... + x_n <br> 0105 * 2. x_j >= minRun for all j <br> 0106 * 3. x_1 + ... + x_{j-2} < x_j < x_1 + ... + x_{j-1} for all j <br> 0107 * These conditions guarantee that TimSort merges all x_j's one by one 0108 * (resulting in X) using only merges on the second-to-last element. 0109 * 0110 * @param X The sum of the sequence that should be added to runs. 0111 */ 0112 private static void generateJDKWrongElem(List<Long> runs, int minRun, long X) { 0113 for (long newTotal; X >= 2 * minRun + 1; X = newTotal) { 0114 //Default strategy 0115 newTotal = X / 2 + 1; 0116 //Specialized strategies 0117 if (3 * minRun + 3 <= X && X <= 4 * minRun + 1) { 0118 // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal to runs 0119 newTotal = 2 * minRun + 1; 0120 } else if (5 * minRun + 5 <= X && X <= 6 * minRun + 5) { 0121 // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal to runs 0122 newTotal = 3 * minRun + 3; 0123 } else if (8 * minRun + 9 <= X && X <= 10 * minRun + 9) { 0124 // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal to runs 0125 newTotal = 5 * minRun + 5; 0126 } else if (13 * minRun + 15 <= X && X <= 16 * minRun + 17) { 0127 // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs 0128 newTotal = 8 * minRun + 9; 0129 } 0130 runs.add(0, X - newTotal); 0131 } 0132 runs.add(0, X); 0133 } 0134 }
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |