Back to home page

OSCL-LXR

 
 

    


0001 -- Unit tests for simple NOT IN predicate subquery across a single column.
0002 --
0003 -- ``col NOT IN expr'' is quite difficult to reason about. There are many edge cases, some of the
0004 -- rules are confusing to the uninitiated, and precedence and treatment of null values is plain
0005 -- unintuitive. To make this simpler to understand, I've come up with a plain English way of
0006 -- describing the expected behavior of this query.
0007 --
0008 -- - If the subquery is empty (i.e. returns no rows), the row should be returned, regardless of
0009 --   whether the filtered columns include nulls.
0010 -- - If the subquery contains a result with all columns null, then the row should not be returned.
0011 -- - If for all non-null filter columns there exists a row in the subquery in which each column
0012 --   either
0013 --   1. is equal to the corresponding filter column or
0014 --   2. is null
0015 --   then the row should not be returned. (This includes the case where all filter columns are
0016 --   null.)
0017 -- - Otherwise, the row should be returned.
0018 --
0019 -- Using these rules, we can come up with a set of test cases for single-column and multi-column
0020 -- NOT IN test cases.
0021 --
0022 -- Test cases for single-column ``WHERE a NOT IN (SELECT c FROM r ...)'':
0023 -- | # | does subquery include null? | is a null? | a = c? | row with a included in result? |
0024 -- | 1 | empty                       |            |        | yes                            |
0025 -- | 2 | yes                         |            |        | no                             |
0026 -- | 3 | no                          | yes        |        | no                             |
0027 -- | 4 | no                          | no         | yes    | no                             |
0028 -- | 5 | no                          | no         | no     | yes                            |
0029 --
0030 -- There are also some considerations around correlated subqueries. Correlated subqueries can
0031 -- cause cases 2, 3, or 4 to be reduced to case 1 by limiting the number of rows returned by the
0032 -- subquery, so the row from the parent table should always be included in the output.
0033 
0034 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
0035   (null, 1.0),
0036   (2, 3.0),
0037   (4, 5.0)
0038   AS m(a, b);
0039 
0040 CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
0041   (null, 1.0),
0042   (2, 3.0),
0043   (6, 7.0)
0044   AS s(c, d);
0045 
0046   -- Uncorrelated NOT IN Subquery test cases
0047   -- Case 1
0048   -- (empty subquery -> all rows returned)
0049 SELECT *
0050 FROM   m
0051 WHERE  a NOT IN (SELECT c
0052                  FROM   s
0053                  WHERE  d > 10.0) -- (empty subquery)
0054 ;
0055 
0056   -- Case 2
0057   -- (subquery includes null -> no rows returned)
0058 SELECT *
0059 FROM   m
0060 WHERE  a NOT IN (SELECT c
0061                  FROM   s
0062                  WHERE  d = 1.0) -- Only matches (null, 1.0)
0063 ;
0064 
0065   -- Case 3
0066   -- (probe column is null -> row not returned)
0067 SELECT *
0068 FROM   m
0069 WHERE  b = 1.0 -- Only matches (null, 1.0)
0070        AND a NOT IN (SELECT c
0071                      FROM   s
0072                      WHERE  d = 3.0) -- Matches (2, 3.0)
0073 ;
0074 
0075   -- Case 4
0076   -- (probe column matches subquery row -> row not returned)
0077 SELECT *
0078 FROM   m
0079 WHERE  b = 3.0 -- Only matches (2, 3.0)
0080        AND a NOT IN (SELECT c
0081                      FROM   s
0082                      WHERE  d = 3.0) -- Matches (2, 3.0)
0083 ;
0084 
0085   -- Case 5
0086   -- (probe column does not match subquery row -> row is returned)
0087 SELECT *
0088 FROM   m
0089 WHERE  b = 3.0 -- Only matches (2, 3.0)
0090        AND a NOT IN (SELECT c
0091                      FROM   s
0092                      WHERE  d = 7.0) -- Matches (6, 7.0)
0093 ;
0094 
0095   -- Correlated NOT IN subquery test cases
0096   -- Case 2->1
0097   -- (subquery had nulls but they are removed by correlated subquery -> all rows returned)
0098 SELECT *
0099 FROM   m
0100 WHERE a NOT IN (SELECT c
0101                 FROM   s
0102                 WHERE  d = b + 10) -- Matches no row
0103 ;
0104 
0105   -- Case 3->1
0106   -- (probe column is null but subquery returns no rows -> row is returned)
0107 SELECT *
0108 FROM   m
0109 WHERE  b = 1.0 -- Only matches (null, 1.0)
0110        AND a NOT IN (SELECT c
0111                      FROM   s
0112                      WHERE  d = b + 10) -- Matches no row
0113 ;
0114 
0115   -- Case 4->1
0116   -- (probe column matches row which is filtered out by correlated subquery -> row is returned)
0117 SELECT *
0118 FROM   m
0119 WHERE  b = 3.0 -- Only matches (2, 3.0)
0120        AND a NOT IN (SELECT c
0121                      FROM   s
0122                      WHERE  d = b + 10) -- Matches no row
0123 ;