Back to home page

OSCL-LXR

 
 

    


0001 -- Test aggregate operator with codegen on and off.
0002 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
0003 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
0004 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
0005 
0006 -- Test data.
0007 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
0008 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
0009 AS testData(a, b);
0010 
0011 -- Aggregate with empty GroupBy expressions.
0012 SELECT a, COUNT(b) FROM testData;
0013 SELECT COUNT(a), COUNT(b) FROM testData;
0014 
0015 -- Aggregate with non-empty GroupBy expressions.
0016 SELECT a, COUNT(b) FROM testData GROUP BY a;
0017 SELECT a, COUNT(b) FROM testData GROUP BY b;
0018 SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a;
0019 
0020 -- Aggregate grouped by literals.
0021 SELECT 'foo', COUNT(a) FROM testData GROUP BY 1;
0022 
0023 -- Aggregate grouped by literals (whole stage code generation).
0024 SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1;
0025 
0026 -- Aggregate grouped by literals (hash aggregate).
0027 SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1;
0028 
0029 -- Aggregate grouped by literals (sort aggregate).
0030 SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
0031 
0032 -- Aggregate with complex GroupBy expressions.
0033 SELECT a + b, COUNT(b) FROM testData GROUP BY a + b;
0034 SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1;
0035 SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1;
0036 
0037 -- Aggregate with nulls.
0038 SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
0039 FROM testData;
0040 
0041 -- Aggregate with foldable input and multiple distinct groups.
0042 SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
0043 
0044 -- Aliases in SELECT could be used in GROUP BY
0045 SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
0046 SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
0047 
0048 -- Aggregate functions cannot be used in GROUP BY
0049 SELECT COUNT(b) AS k FROM testData GROUP BY k;
0050 
0051 -- Test data.
0052 CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
0053 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
0054 SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a;
0055 
0056 -- turn off group by aliases
0057 set spark.sql.groupByAliases=false;
0058 
0059 -- Check analysis exceptions
0060 SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
0061 
0062 -- Aggregate with empty input and non-empty GroupBy expressions.
0063 SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
0064 
0065 -- Aggregate with empty input and empty GroupBy expressions.
0066 SELECT COUNT(1) FROM testData WHERE false;
0067 SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
0068 
0069 -- Aggregate with empty GroupBy expressions and filter on top
0070 SELECT 1 from (
0071   SELECT 1 AS z,
0072   MIN(a.x)
0073   FROM (select 1 as x) a
0074   WHERE false
0075 ) b
0076 where b.z != b.z;
0077 
0078 -- SPARK-24369 multiple distinct aggregations having the same argument set
0079 SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
0080   FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
0081 
0082 -- SPARK-25708 HAVING without GROUP BY means global aggregate
0083 SELECT 1 FROM range(10) HAVING true;
0084 
0085 SELECT 1 FROM range(10) HAVING MAX(id) > 0;
0086 
0087 SELECT id FROM range(10) HAVING id > 0;
0088 
0089 -- Test data
0090 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
0091   (1, true), (1, false),
0092   (2, true),
0093   (3, false), (3, null),
0094   (4, null), (4, null),
0095   (5, null), (5, true), (5, false) AS test_agg(k, v);
0096 
0097 -- empty table
0098 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
0099 
0100 -- all null values
0101 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
0102 
0103 -- aggregates are null Filtering
0104 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
0105 
0106 -- group by
0107 SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
0108 
0109 -- having
0110 SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
0111 SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL;
0112 
0113 -- basic subquery path to make sure rewrite happens in both parent and child plans.
0114 SELECT k,
0115        Every(v) AS every
0116 FROM   test_agg
0117 WHERE  k = 2
0118        AND v IN (SELECT Any(v)
0119                  FROM   test_agg
0120                  WHERE  k = 1)
0121 GROUP  BY k;
0122 
0123 -- basic subquery path to make sure rewrite happens in both parent and child plans.
0124 SELECT k,
0125        Every(v) AS every
0126 FROM   test_agg
0127 WHERE  k = 2
0128        AND v IN (SELECT Every(v)
0129                  FROM   test_agg
0130                  WHERE  k = 1)
0131 GROUP  BY k;
0132 
0133 -- input type checking Int
0134 SELECT every(1);
0135 
0136 -- input type checking Short
0137 SELECT some(1S);
0138 
0139 -- input type checking Long
0140 SELECT any(1L);
0141 
0142 -- input type checking String
0143 SELECT every("true");
0144 
0145 -- input type checking Decimal
0146 SELECT bool_and(1.0);
0147 
0148 -- input type checking double
0149 SELECT bool_or(1.0D);
0150 
0151 -- every/some/any aggregates/bool_and/bool_or are supported as windows expression.
0152 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0153 SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0154 SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0155 SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0156 SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0157 
0158 -- Having referencing aggregate expressions is ok.
0159 SELECT count(*) FROM test_agg HAVING count(*) > 1L;
0160 SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true;
0161 
0162 -- Aggrgate expressions can be referenced through an alias
0163 SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
0164 
0165 -- Error when aggregate expressions are in where clause directly
0166 SELECT count(*) FROM test_agg WHERE count(*) > 1L;
0167 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
0168 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;