Back to home page

OSCL-LXR

 
 

    


0001 /**
0002  * Copyright 2015 Stijn de Gouw
0003  *
0004  * Licensed under the Apache License, Version 2.0 (the "License");
0005  * you may not use this file except in compliance with the License.
0006  * You may obtain a copy of the License at
0007  *
0008  *   http://www.apache.org/licenses/LICENSE-2.0
0009  *
0010  * Unless required by applicable law or agreed to in writing, software
0011  * distributed under the License is distributed on an "AS IS" BASIS,
0012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013  * See the License for the specific language governing permissions and
0014  * limitations under the License.
0015  *
0016  */
0017 package org.apache.spark.util.collection;
0018 
0019 import java.util.*;
0020 
0021 /**
0022  * This codes generates a int array which fails the standard TimSort.
0023  *
0024  * The blog that reported the bug
0025  * http://www.envisage-project.eu/timsort-specification-and-verification/
0026  *
0027  * This codes was originally wrote by Stijn de Gouw, modified by Evan Yu to adapt to
0028  * our test suite.
0029  *
0030  * https://github.com/abstools/java-timsort-bug
0031  * https://github.com/abstools/java-timsort-bug/blob/master/LICENSE
0032  */
0033 public class TestTimSort {
0034 
0035   private static final int MIN_MERGE = 32;
0036 
0037   /**
0038    * Returns an array of integers that demonstrate the bug in TimSort
0039    */
0040   public static int[] getTimSortBugTestSet(int length) {
0041     int minRun = minRunLength(length);
0042     List<Long> runs = runsJDKWorstCase(minRun, length);
0043     return createArray(runs, length);
0044   }
0045 
0046   private static int minRunLength(int n) {
0047     int r = 0; // Becomes 1 if any 1 bits are shifted off
0048     while (n >= MIN_MERGE) {
0049       r |= (n & 1);
0050       n >>= 1;
0051     }
0052     return n + r;
0053   }
0054 
0055   private static int[] createArray(List<Long> runs, int length) {
0056     int[] a = new int[length];
0057     Arrays.fill(a, 0);
0058     int endRun = -1;
0059     for (long len : runs) {
0060       a[endRun += len] = 1;
0061     }
0062     a[length - 1] = 0;
0063     return a;
0064   }
0065 
0066   /**
0067    * Fills <code>runs</code> with a sequence of run lengths of the form<br>
0068    * Y_n     x_{n,1}   x_{n,2}   ... x_{n,l_n} <br>
0069    * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br>
0070    * ... <br>
0071    * Y_1     x_{1,1}   x_{1,2}   ... x_{1,l_1}<br>
0072    * The Y_i's are chosen to satisfy the invariant throughout execution,
0073    * but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>)
0074    * into an X_i that violates the invariant.
0075    *
0076    * @param length The sum of all run lengths that will be added to <code>runs</code>.
0077    */
0078   private static List<Long> runsJDKWorstCase(int minRun, int length) {
0079     List<Long> runs = new ArrayList<>();
0080 
0081     long runningTotal = 0, Y = minRun + 4, X = minRun;
0082 
0083     while (runningTotal + Y + X <= length) {
0084       runningTotal += X + Y;
0085       generateJDKWrongElem(runs, minRun, X);
0086       runs.add(0, Y);
0087       // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1}
0088       X = Y + runs.get(1) + 1;
0089       // Y_{i+1} = X_{i+1} + Y_i + 1
0090       Y += X + 1;
0091     }
0092 
0093     if (runningTotal + X <= length) {
0094       runningTotal += X;
0095       generateJDKWrongElem(runs, minRun, X);
0096     }
0097 
0098     runs.add(length - runningTotal);
0099     return runs;
0100   }
0101 
0102   /**
0103    * Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br>
0104    * 1. X = x_1 + ... + x_n <br>
0105    * 2. x_j >= minRun for all j <br>
0106    * 3. x_1 + ... + x_{j-2}  <  x_j  <  x_1 + ... + x_{j-1} for all j <br>
0107    * These conditions guarantee that TimSort merges all x_j's one by one
0108    * (resulting in X) using only merges on the second-to-last element.
0109    *
0110    * @param X The sum of the sequence that should be added to runs.
0111    */
0112   private static void generateJDKWrongElem(List<Long> runs, int minRun, long X) {
0113     for (long newTotal; X >= 2 * minRun + 1; X = newTotal) {
0114       //Default strategy
0115       newTotal = X / 2 + 1;
0116       //Specialized strategies
0117       if (3 * minRun + 3 <= X && X <= 4 * minRun + 1) {
0118         // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal  to runs
0119         newTotal = 2 * minRun + 1;
0120       } else if (5 * minRun + 5 <= X && X <= 6 * minRun + 5) {
0121         // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal  to runs
0122         newTotal = 3 * minRun + 3;
0123       } else if (8 * minRun + 9 <= X && X <= 10 * minRun + 9) {
0124         // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal  to runs
0125         newTotal = 5 * minRun + 5;
0126       } else if (13 * minRun + 15 <= X && X <= 16 * minRun + 17) {
0127         // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs
0128         newTotal = 8 * minRun + 9;
0129       }
0130       runs.add(0, X - newTotal);
0131     }
0132     runs.add(0, X);
0133   }
0134 }