0001
0002
0003 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
0004 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
0005 AS testData(a, b);
0006
0007
0008 SELECT udf(a), udf(COUNT(b)) FROM testData;
0009 SELECT COUNT(udf(a)), udf(COUNT(b)) FROM testData;
0010
0011
0012 SELECT udf(a), COUNT(udf(b)) FROM testData GROUP BY a;
0013 SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b;
0014 SELECT COUNT(udf(a)), COUNT(udf(b)) FROM testData GROUP BY udf(a);
0015
0016
0017 SELECT 'foo', COUNT(udf(a)) FROM testData GROUP BY 1;
0018
0019
0020 SELECT 'foo' FROM testData WHERE a = 0 GROUP BY udf(1);
0021
0022
0023 SELECT 'foo', udf(APPROX_COUNT_DISTINCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1);
0024
0025
0026 SELECT 'foo', MAX(STRUCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1);
0027
0028
0029 SELECT udf(a + b), udf(COUNT(b)) FROM testData GROUP BY a + b;
0030 SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1;
0031 SELECT udf(a + 1) + 1, udf(COUNT(b)) FROM testData GROUP BY udf(a + 1);
0032
0033
0034 SELECT SKEWNESS(udf(a)), udf(KURTOSIS(a)), udf(MIN(a)), MAX(udf(a)), udf(AVG(udf(a))), udf(VARIANCE(a)), STDDEV(udf(a)), udf(SUM(a)), udf(COUNT(a))
0035 FROM testData;
0036
0037
0038 SELECT COUNT(DISTINCT udf(b)), udf(COUNT(DISTINCT b, c)) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY udf(a);
0039
0040
0041 SELECT udf(a) AS k, COUNT(udf(b)) FROM testData GROUP BY k;
0042 SELECT a AS k, udf(COUNT(b)) FROM testData GROUP BY k HAVING k > 1;
0043
0044
0045 SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k;
0046
0047
0048 CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
0049 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
0050 SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf(a);
0051
0052
0053 set spark.sql.groupByAliases=false;
0054
0055
0056 SELECT a AS k, udf(COUNT(udf(b))) FROM testData GROUP BY k;
0057
0058
0059 SELECT udf(a), COUNT(udf(1)) FROM testData WHERE false GROUP BY udf(a);
0060
0061
0062 SELECT udf(COUNT(1)) FROM testData WHERE false;
0063 SELECT 1 FROM (SELECT udf(COUNT(1)) FROM testData WHERE false) t;
0064
0065
0066 SELECT 1 from (
0067 SELECT 1 AS z,
0068 udf(MIN(a.x))
0069 FROM (select 1 as x) a
0070 WHERE false
0071 ) b
0072 where b.z != b.z;
0073
0074
0075 SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
0076 FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
0077
0078
0079 SELECT udf(1) FROM range(10) HAVING true;
0080
0081 SELECT udf(udf(1)) FROM range(10) HAVING MAX(id) > 0;
0082
0083 SELECT udf(id) FROM range(10) HAVING id > 0;
0084
0085
0086 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
0087 (1, true), (1, false),
0088 (2, true),
0089 (3, false), (3, null),
0090 (4, null), (4, null),
0091 (5, null), (5, true), (5, false) AS test_agg(k, v);
0092
0093
0094 SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0;
0095
0096
0097 SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4;
0098
0099
0100 SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5;
0101
0102
0103 SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k);
0104
0105
0106 SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
0107 SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL;
0108
0109
0110 SELECT udf(k),
0111 udf(Every(v)) AS every
0112 FROM test_agg
0113 WHERE k = 2
0114 AND v IN (SELECT Any(v)
0115 FROM test_agg
0116 WHERE k = 1)
0117 GROUP BY udf(k);
0118
0119
0120 SELECT udf(udf(k)),
0121 Every(v) AS every
0122 FROM test_agg
0123 WHERE k = 2
0124 AND v IN (SELECT Every(v)
0125 FROM test_agg
0126 WHERE k = 1)
0127 GROUP BY udf(udf(k));
0128
0129
0130 SELECT every(udf(1));
0131
0132
0133 SELECT some(udf(1S));
0134
0135
0136 SELECT any(udf(1L));
0137
0138
0139 SELECT udf(every("true"));
0140
0141
0142 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0143 SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0144 SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
0145
0146
0147 SELECT udf(count(*)) FROM test_agg HAVING count(*) > 1L;
0148 SELECT k, udf(max(v)) FROM test_agg GROUP BY k HAVING max(v) = true;
0149
0150
0151 SELECT * FROM (SELECT udf(COUNT(*)) AS cnt FROM test_agg) WHERE cnt > 1L;
0152
0153
0154 SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L;
0155 SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L;
0156 SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;