Back to home page

OSCL-LXR

 
 

    


0001 -- A test suite for IN JOINS in parent side, subquery, and both predicate subquery
0002 -- It includes correlated cases.
0003 
0004 -- There are 2 dimensions we want to test
0005 --  1. run with broadcast hash join, sort merge join or shuffle hash join.
0006 --  2. run with whole-stage-codegen, operator codegen or no codegen.
0007 
0008 --CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
0009 --CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
0010 --CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
0011 
0012 --CONFIG_DIM2 spark.sql.codegen.wholeStage=true
0013 --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
0014 --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
0015 
0016 create temporary view t1 as select * from values
0017   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
0018   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
0019   ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
0020   ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
0021   ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
0022   ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
0023   ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
0024   ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
0025   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
0026   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
0027   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
0028   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
0029   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
0030 
0031 create temporary view t2 as select * from values
0032   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
0033   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
0034   ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
0035   ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
0036   ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
0037   ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
0038   ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
0039   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
0040   ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
0041   ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
0042   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
0043   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
0044   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
0045   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
0046 
0047 create temporary view t3 as select * from values
0048   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
0049   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
0050   ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
0051   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
0052   ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
0053   ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
0054   ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
0055   ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
0056   ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
0057   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
0058   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
0059   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
0060   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
0061 
0062 create temporary view s1 as select * from values
0063     (1), (3), (5), (7), (9)
0064   as s1(id);
0065 
0066 create temporary view s2 as select * from values
0067     (1), (3), (4), (6), (9)
0068   as s2(id);
0069 
0070 create temporary view s3 as select * from values
0071     (3), (4), (6), (9)
0072   as s3(id);
0073 
0074 -- correlated IN subquery
0075 -- different JOIN in parent side
0076 -- TC 01.01
0077 SELECT t1a, t1b, t1c, t3a, t3b, t3c
0078 FROM   t1 natural JOIN t3
0079 WHERE  t1a IN (SELECT t2a
0080                FROM   t2
0081                WHERE t1a = t2a)
0082        AND t1b = t3b
0083        AND t1a = t3a
0084 ORDER  BY t1a,
0085           t1b,
0086           t1c DESC nulls first;
0087 
0088 -- TC 01.02
0089 SELECT    Count(DISTINCT(t1a)),
0090           t1b,
0091           t3a,
0092           t3b,
0093           t3c
0094 FROM      t1 natural left JOIN t3
0095 WHERE     t1a IN
0096           (
0097                  SELECT t2a
0098                  FROM   t2
0099                  WHERE t1d = t2d)
0100 AND       t1b > t3b
0101 GROUP BY  t1a,
0102           t1b,
0103           t3a,
0104           t3b,
0105           t3c
0106 ORDER BY  t1a DESC, t3b DESC, t3c ASC;
0107 
0108 -- TC 01.03
0109 SELECT     Count(DISTINCT(t1a))
0110 FROM       t1 natural right JOIN t3
0111 WHERE      t1a IN
0112            (
0113                   SELECT t2a
0114                   FROM   t2
0115                   WHERE  t1b = t2b)
0116 AND        t1d IN
0117            (
0118                   SELECT t2d
0119                   FROM   t2
0120                   WHERE  t1c > t2c)
0121 AND        t1a = t3a
0122 GROUP BY   t1a
0123 ORDER BY   t1a;
0124 
0125 -- TC 01.04
0126 SELECT          t1a,
0127                 t1b,
0128                 t1c,
0129                 t3a,
0130                 t3b,
0131                 t3c
0132 FROM            t1 FULL OUTER JOIN t3
0133 where           t1a IN
0134                 (
0135                        SELECT t2a
0136                        FROM   t2
0137                        WHERE t2c IS NOT NULL)
0138 AND             t1b != t3b
0139 AND             t1a = 'val1b'
0140 ORDER BY        t1a;
0141 
0142 -- TC 01.05
0143 SELECT     Count(DISTINCT(t1a)),
0144            t1b
0145 FROM       t1 RIGHT JOIN t3
0146 where      t1a IN
0147            (
0148                   SELECT t2a
0149                   FROM   t2
0150                   WHERE  t2h > t3h)
0151 AND        t3a IN
0152            (
0153                   SELECT t2a
0154                   FROM   t2
0155                   WHERE  t2c > t3c)
0156 AND        t1h >= t3h
0157 GROUP BY   t1a,
0158            t1b
0159 HAVING     t1b > 8
0160 ORDER BY   t1a;
0161 
0162 -- TC 01.06
0163 SELECT   Count(DISTINCT(t1a))
0164 FROM     t1 LEFT OUTER
0165 JOIN     t3
0166 ON t1a = t3a
0167 WHERE    t1a IN
0168          (
0169                 SELECT t2a
0170                 FROM   t2
0171                 WHERE  t1h < t2h )
0172 GROUP BY t1a
0173 ORDER BY t1a;
0174 
0175 -- TC 01.07
0176 SELECT   Count(DISTINCT(t1a)),
0177          t1b
0178 FROM     t1 INNER JOIN     t2
0179 ON       t1a > t2a
0180 WHERE    t1b IN
0181          (
0182                 SELECT t2b
0183                 FROM   t2
0184                 WHERE  t2h > t1h)
0185 OR       t1a IN
0186          (
0187                 SELECT t2a
0188                 FROM   t2
0189                 WHERE  t2h < t1h)
0190 GROUP BY t1b
0191 HAVING   t1b > 6;
0192 
0193 -- different JOIN in the subquery
0194 -- TC 01.08
0195 SELECT   Count(DISTINCT(t1a)),
0196          t1b
0197 FROM     t1
0198 WHERE    t1a IN
0199          (
0200                     SELECT     t2a
0201                     FROM       t2
0202                     JOIN t1
0203                     WHERE      t2b <> t1b)
0204 AND      t1h IN
0205          (
0206                     SELECT     t2h
0207                     FROM       t2
0208                     RIGHT JOIN t3
0209                     where      t2b = t3b)
0210 GROUP BY t1b
0211 HAVING t1b > 8;
0212 
0213 -- TC 01.09
0214 SELECT   Count(DISTINCT(t1a)),
0215          t1b
0216 FROM     t1
0217 WHERE    t1a IN
0218          (
0219                     SELECT     t2a
0220                     FROM       t2
0221                     JOIN t1
0222                     WHERE      t2b <> t1b)
0223 AND      t1h IN
0224          (
0225                     SELECT     t2h
0226                     FROM       t2
0227                     RIGHT JOIN t3
0228                     where      t2b = t3b)
0229 AND       t1b IN
0230          (
0231                     SELECT     t2b
0232                     FROM       t2
0233                     FULL OUTER JOIN t3
0234                     where      t2b = t3b)
0235 
0236 GROUP BY t1b
0237 HAVING   t1b > 8;
0238 
0239 -- JOIN in the parent and subquery
0240 -- TC 01.10
0241 SELECT     Count(DISTINCT(t1a)),
0242            t1b
0243 FROM       t1
0244 INNER JOIN t2 on t1b = t2b
0245 RIGHT JOIN t3 ON t1a = t3a
0246 where      t1a IN
0247            (
0248                            SELECT          t2a
0249                            FROM            t2
0250                            FULL OUTER JOIN t3
0251                            WHERE           t2b > t3b)
0252 AND        t1c IN
0253            (
0254                            SELECT          t3c
0255                            FROM            t3
0256                            LEFT OUTER JOIN t2
0257                            ON              t3a = t2a )
0258 AND        t1b IN
0259            (
0260                   SELECT t3b
0261                   FROM   t3 LEFT OUTER
0262                   JOIN   t1
0263                   WHERE  t3c = t1c)
0264 
0265 AND        t1a = t2a
0266 GROUP BY   t1b
0267 ORDER BY   t1b DESC;
0268 
0269 -- TC 01.11
0270 SELECT    t1a,
0271           t1b,
0272           t1c,
0273           count(distinct(t2a)),
0274           t2b,
0275           t2c
0276 FROM      t1
0277 FULL JOIN t2  on t1a = t2a
0278 RIGHT JOIN t3 on t1a = t3a
0279 where     t1a IN
0280           (
0281                  SELECT t2a
0282                  FROM   t2 INNER
0283                  JOIN   t3
0284                  ON     t2b < t3b
0285                  WHERE  t2c IN
0286                         (
0287                                SELECT t1c
0288                                FROM   t1
0289                                WHERE  t1a = t2a))
0290 and t1a = t2a
0291 Group By t1a, t1b, t1c, t2a, t2b, t2c
0292 HAVING t2c IS NOT NULL
0293 ORDER By t2b DESC nulls last;
0294 
0295 
0296 SELECT s1.id FROM s1
0297 JOIN s2 ON s1.id = s2.id
0298 AND s1.id IN (SELECT 9);
0299 
0300 
0301 SELECT s1.id FROM s1
0302 JOIN s2 ON s1.id = s2.id
0303 AND s1.id NOT IN (SELECT 9);
0304 
0305 
0306 -- IN with Subquery ON INNER JOIN
0307 SELECT s1.id FROM s1
0308 JOIN s2 ON s1.id = s2.id
0309 AND s1.id IN (SELECT id FROM s3);
0310 
0311 
0312 -- IN with Subquery ON LEFT SEMI JOIN
0313 SELECT s1.id AS id2 FROM s1
0314 LEFT SEMI JOIN s2
0315 ON s1.id = s2.id
0316 AND s1.id IN (SELECT id FROM s3);
0317 
0318 
0319 -- IN with Subquery ON LEFT ANTI JOIN
0320 SELECT s1.id as id2 FROM s1
0321 LEFT ANTI JOIN s2
0322 ON s1.id = s2.id
0323 AND s1.id IN (SELECT id FROM s3);
0324 
0325 
0326 -- IN with Subquery ON LEFT OUTER JOIN
0327 SELECT s1.id, s2.id as id2 FROM s1
0328 LEFT OUTER JOIN s2
0329 ON s1.id = s2.id
0330 AND s1.id IN (SELECT id FROM s3);
0331 
0332 
0333 -- IN with Subquery ON RIGHT OUTER JOIN
0334 SELECT s1.id, s2.id as id2 FROM s1
0335 RIGHT OUTER JOIN s2
0336 ON s1.id = s2.id
0337 AND s1.id IN (SELECT id FROM s3);
0338 
0339 
0340 -- IN with Subquery ON FULL OUTER JOIN
0341 SELECT s1.id, s2.id AS id2 FROM s1
0342 FULL OUTER JOIN s2
0343 ON s1.id = s2.id
0344 AND s1.id IN (SELECT id FROM s3);
0345 
0346 
0347 -- NOT IN with Subquery ON INNER JOIN
0348 SELECT s1.id FROM s1
0349 JOIN s2 ON s1.id = s2.id
0350 AND s1.id NOT IN (SELECT id FROM s3);
0351 
0352 
0353 -- NOT IN with Subquery ON LEFT SEMI JOIN
0354 SELECT s1.id AS id2 FROM s1
0355 LEFT SEMI JOIN s2
0356 ON s1.id = s2.id
0357 AND s1.id NOT IN (SELECT id FROM s3);
0358 
0359 
0360 -- NOT IN with Subquery ON LEFT ANTI JOIN
0361 SELECT s1.id AS id2 FROM s1
0362 LEFT ANTI JOIN s2
0363 ON s1.id = s2.id
0364 AND s1.id NOT IN (SELECT id FROM s3);
0365 
0366 
0367 -- NOT IN with Subquery ON LEFT OUTER JOIN
0368 SELECT s1.id, s2.id AS id2 FROM s1
0369 LEFT OUTER JOIN s2
0370 ON s1.id = s2.id
0371 AND s1.id NOT IN (SELECT id FROM s3);
0372 
0373 
0374 -- NOT IN with Subquery ON RIGHT OUTER JOIN
0375 SELECT s1.id, s2.id AS id2 FROM s1
0376 RIGHT OUTER JOIN s2
0377 ON s1.id = s2.id
0378 AND s1.id NOT IN (SELECT id FROM s3);
0379 
0380 
0381 -- NOT IN with Subquery ON FULL OUTER JOIN
0382 SELECT s1.id, s2.id AS id2 FROM s1
0383 FULL OUTER JOIN s2
0384 ON s1.id = s2.id
0385 AND s1.id NOT IN (SELECT id FROM s3);
0386 
0387 
0388 DROP VIEW s1;
0389 
0390 DROP VIEW s2;
0391 
0392 DROP VIEW s3;