udf/postgreSQL/udf-select_having.sql

0001 --
0002 -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
0003 --
0004 --
0005 -- SELECT_HAVING
0006 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
0007 --
0008 -- This test file was converted from inputs/postgreSQL/select_having.sql
0009 -- TODO: We should add UDFs in GROUP BY clause when [SPARK-28386] and [SPARK-26741] is resolved.
0010
0011 -- load test data
0012 CREATE TABLE test_having (a int, b int, c string, d string) USING parquet;
0013 INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A');
0014 INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b');
0015 INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c');
0016 INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D');
0017 INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e');
0018 INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F');
0019 INSERT INTO test_having VALUES (6, 4, 'cccc', 'g');
0020 INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
0021 INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
0022 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
0023
0024 SELECT udf(b), udf(c) FROM test_having
0025         GROUP BY b, c HAVING udf(count(*)) = 1 ORDER BY udf(b), udf(c);
0026
0027 -- HAVING is effectively equivalent to WHERE in this case
0028 SELECT udf(b), udf(c) FROM test_having
0029         GROUP BY b, c HAVING udf(b) = 3 ORDER BY udf(b), udf(c);
0030
0031 -- [SPARK-28386] Cannot resolve ORDER BY columns with GROUP BY and HAVING
0032 -- SELECT lower(c), count(c) FROM test_having
0033 --      GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
0034 --      ORDER BY lower(c);
0035
0036 SELECT udf(c), max(udf(a)) FROM test_having
0037         GROUP BY c HAVING udf(count(*)) > 2 OR udf(min(a)) = udf(max(a))
0038         ORDER BY c;
0039
0040 -- test degenerate cases involving HAVING without GROUP BY
0041 -- Per SQL spec, these should generate 0 or 1 row, even without aggregates
0042
0043 SELECT udf(udf(min(udf(a)))), udf(udf(max(udf(a)))) FROM test_having HAVING udf(udf(min(udf(a)))) = udf(udf(max(udf(a))));
0044 SELECT udf(min(udf(a))), udf(udf(max(a))) FROM test_having HAVING udf(min(a)) < udf(max(udf(a)));
0045
0046 -- errors: ungrouped column references
0047 SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a));
0048 SELECT 1 AS one FROM test_having HAVING udf(a) > 1;
0049
0050 -- the really degenerate case: need not scan table at all
0051 SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2));
0052 SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2));
0053
0054 -- and just to prove that we aren't scanning the table:
0055 SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2;
0056
0057 DROP TABLE test_having;