|
||||
0001 -- 0002 -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 0003 -- 0004 -- 0005 -- AGGREGATES [Part 4] 0006 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997 0007 0008 -- Test aggregate operator with codegen on and off. 0009 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true 0010 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY 0011 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN 0012 0013 -- [SPARK-27980] Ordered-Set Aggregate Functions 0014 -- ordered-set aggregates 0015 0016 -- select p, percentile_cont(p) within group (order by x::float8) 0017 -- from generate_series(1,5) x, 0018 -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) 0019 -- group by p order by p; 0020 0021 -- select p, percentile_cont(p order by p) within group (order by x) -- error 0022 -- from generate_series(1,5) x, 0023 -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) 0024 -- group by p order by p; 0025 0026 -- select p, sum() within group (order by x::float8) -- error 0027 -- from generate_series(1,5) x, 0028 -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) 0029 -- group by p order by p; 0030 0031 -- select p, percentile_cont(p,p) -- error 0032 -- from generate_series(1,5) x, 0033 -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) 0034 -- group by p order by p; 0035 0036 -- select percentile_cont(0.5) within group (order by b) from aggtest; 0037 -- select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; 0038 -- select percentile_cont(0.5) within group (order by thousand) from tenk1; 0039 -- select percentile_disc(0.5) within group (order by thousand) from tenk1; 0040 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0041 -- select rank(3) within group (order by x) 0042 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); 0043 -- select cume_dist(3) within group (order by x) 0044 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); 0045 -- select percent_rank(3) within group (order by x) 0046 -- from (values (1),(1),(2),(2),(3),(3),(4),(5)) v(x); 0047 -- select dense_rank(3) within group (order by x) 0048 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); 0049 0050 -- [SPARK-27980] Ordered-Set Aggregate Functions 0051 -- select percentile_disc(array[0,0.1,0.25,0.5,0.75,0.9,1]) within group (order by thousand) 0052 -- from tenk1; 0053 -- select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand) 0054 -- from tenk1; 0055 -- select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand) 0056 -- from tenk1; 0057 -- select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x) 0058 -- from generate_series(1,6) x; 0059 0060 -- [SPARK-27980] Ordered-Set Aggregate Functions 0061 -- [SPARK-28382] Array Functions: unnest 0062 -- select ten, mode() within group (order by string4) from tenk1 group by ten; 0063 0064 -- select percentile_disc(array[0.25,0.5,0.75]) within group (order by x) 0065 -- from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::text[]) u(x); 0066 0067 -- [SPARK-28669] System Information Functions 0068 -- check collation propagates up in suitable cases: 0069 -- select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX")) 0070 -- from (values ('fred'),('jim')) v(x); 0071 0072 -- test_rank and test_percentile_disc function created by create_aggregate.sql 0073 -- ordered-set aggs created with CREATE AGGREGATE 0074 -- select test_rank(3) within group (order by x) 0075 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); 0076 -- select test_percentile_disc(0.5) within group (order by thousand) from tenk1; 0077 0078 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0079 -- ordered-set aggs can't use ungrouped vars in direct args: 0080 -- select rank(x) within group (order by x) from generate_series(1,5) x; 0081 0082 -- [SPARK-27980] Ordered-Set Aggregate Functions 0083 -- outer-level agg can't use a grouped arg of a lower level, either: 0084 -- select array(select percentile_disc(a) within group (order by x) 0085 -- from (values (0.3),(0.7)) v(a) group by a) 0086 -- from generate_series(1,5) g(x); 0087 0088 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0089 -- agg in the direct args is a grouping violation, too: 0090 --select rank(sum(x)) within group (order by x) from generate_series(1,5) x; 0091 0092 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0093 -- hypothetical-set type unification and argument-count failures: 0094 -- select rank(3) within group (order by x) from (values ('fred'),('jim')) v(x); 0095 -- select rank(3) within group (order by stringu1,stringu2) from tenk1; 0096 -- select rank('fred') within group (order by x) from generate_series(1,5) x; 0097 -- select rank('adam'::text collate "C") within group (order by x collate "POSIX") 0098 -- from (values ('fred'),('jim')) v(x); 0099 -- hypothetical-set type unification successes: 0100 -- select rank('adam'::varchar) within group (order by x) from (values ('fred'),('jim')) v(x); 0101 -- select rank('3') within group (order by x) from generate_series(1,5) x; 0102 0103 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0104 -- divide by zero check 0105 -- select percent_rank(0) within group (order by x) from generate_series(1,0) x; 0106 0107 -- [SPARK-27980] Ordered-Set Aggregate Functions 0108 -- deparse and multiple features: 0109 -- create view aggordview1 as 0110 -- select ten, 0111 -- percentile_disc(0.5) within group (order by thousand) as p50, 0112 -- percentile_disc(0.5) within group (order by thousand) filter (where hundred=1) as px, 0113 -- rank(5,'AZZZZ',50) within group (order by hundred, string4 desc, hundred) 0114 -- from tenk1 0115 -- group by ten order by ten; 0116 0117 -- select pg_get_viewdef('aggordview1'); 0118 -- select * from aggordview1 order by ten; 0119 -- drop view aggordview1; 0120 0121 -- least_agg created by create_aggregate.sql 0122 -- variadic aggregates 0123 -- select least_agg(q1,q2) from int8_tbl; 0124 -- select least_agg(variadic array[q1,q2]) from int8_tbl; 0125 0126 0127 -- Skip these tests because we do not support create type 0128 -- test aggregates with common transition functions share the same states 0129 -- begin work; 0130 0131 -- create type avg_state as (total bigint, count bigint); 0132 0133 -- create or replace function avg_transfn(state avg_state, n int) returns avg_state as 0134 -- $$ 0135 -- declare new_state avg_state; 0136 -- begin 0137 -- raise notice 'avg_transfn called with %', n; 0138 -- if state is null then 0139 -- if n is not null then 0140 -- new_state.total := n; 0141 -- new_state.count := 1; 0142 -- return new_state; 0143 -- end if; 0144 -- return null; 0145 -- elsif n is not null then 0146 -- state.total := state.total + n; 0147 -- state.count := state.count + 1; 0148 -- return state; 0149 -- end if; 0150 -- 0151 -- return null; 0152 -- end 0153 -- $$ language plpgsql; 0154 0155 -- create function avg_finalfn(state avg_state) returns int4 as 0156 -- $$ 0157 -- begin 0158 -- if state is null then 0159 -- return NULL; 0160 -- else 0161 -- return state.total / state.count; 0162 -- end if; 0163 -- end 0164 -- $$ language plpgsql; 0165 0166 -- create function sum_finalfn(state avg_state) returns int4 as 0167 -- $$ 0168 -- begin 0169 -- if state is null then 0170 -- return NULL; 0171 -- else 0172 -- return state.total; 0173 -- end if; 0174 -- end 0175 -- $$ language plpgsql; 0176 0177 -- create aggregate my_avg(int4) 0178 -- ( 0179 -- stype = avg_state, 0180 -- sfunc = avg_transfn, 0181 -- finalfunc = avg_finalfn 0182 -- ); 0183 -- 0184 -- create aggregate my_sum(int4) 0185 -- ( 0186 -- stype = avg_state, 0187 -- sfunc = avg_transfn, 0188 -- finalfunc = sum_finalfn 0189 -- ); 0190 0191 -- aggregate state should be shared as aggs are the same. 0192 -- select my_avg(one),my_avg(one) from (values(1),(3)) t(one); 0193 0194 -- aggregate state should be shared as transfn is the same for both aggs. 0195 -- select my_avg(one),my_sum(one) from (values(1),(3)) t(one); 0196 0197 -- same as previous one, but with DISTINCT, which requires sorting the input. 0198 -- select my_avg(distinct one),my_sum(distinct one) from (values(1),(3),(1)) t(one); 0199 0200 -- shouldn't share states due to the distinctness not matching. 0201 -- select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one); 0202 0203 -- shouldn't share states due to the filter clause not matching. 0204 -- select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one); 0205 0206 -- this should not share the state due to different input columns. 0207 -- select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two); 0208 0209 -- [SPARK-27980] Ordered-Set Aggregate Functions 0210 -- exercise cases where OSAs share state 0211 -- select 0212 -- percentile_cont(0.5) within group (order by a), 0213 -- percentile_disc(0.5) within group (order by a) 0214 -- from (values(1::float8),(3),(5),(7)) t(a); 0215 0216 -- select 0217 -- percentile_cont(0.25) within group (order by a), 0218 -- percentile_disc(0.5) within group (order by a) 0219 -- from (values(1::float8),(3),(5),(7)) t(a); 0220 0221 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0222 -- these can't share state currently 0223 -- select 0224 -- rank(4) within group (order by a), 0225 -- dense_rank(4) within group (order by a) 0226 -- from (values(1),(3),(5),(7)) t(a); 0227 0228 -- test that aggs with the same sfunc and initcond share the same agg state 0229 -- create aggregate my_sum_init(int4) 0230 -- ( 0231 -- stype = avg_state, 0232 -- sfunc = avg_transfn, 0233 -- finalfunc = sum_finalfn, 0234 -- initcond = '(10,0)' 0235 -- ); 0236 0237 -- create aggregate my_avg_init(int4) 0238 -- ( 0239 -- stype = avg_state, 0240 -- sfunc = avg_transfn, 0241 -- finalfunc = avg_finalfn, 0242 -- initcond = '(10,0)' 0243 -- ); 0244 0245 -- create aggregate my_avg_init2(int4) 0246 -- ( 0247 -- stype = avg_state, 0248 -- sfunc = avg_transfn, 0249 -- finalfunc = avg_finalfn, 0250 -- initcond = '(4,0)' 0251 -- ); 0252 0253 -- state should be shared if INITCONDs are matching 0254 -- select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one); 0255 0256 -- Varying INITCONDs should cause the states not to be shared. 0257 -- select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one); 0258 0259 -- rollback; 0260 0261 -- test aggregate state sharing to ensure it works if one aggregate has a 0262 -- finalfn and the other one has none. 0263 -- begin work; 0264 0265 -- create or replace function sum_transfn(state int4, n int4) returns int4 as 0266 -- $$ 0267 -- declare new_state int4; 0268 -- begin 0269 -- raise notice 'sum_transfn called with %', n; 0270 -- if state is null then 0271 -- if n is not null then 0272 -- new_state := n; 0273 -- return new_state; 0274 -- end if; 0275 -- return null; 0276 -- elsif n is not null then 0277 -- state := state + n; 0278 -- return state; 0279 -- end if; 0280 -- 0281 -- return null; 0282 -- end 0283 -- $$ language plpgsql; 0284 0285 -- create function halfsum_finalfn(state int4) returns int4 as 0286 -- $$ 0287 -- begin 0288 -- if state is null then 0289 -- return NULL; 0290 -- else 0291 -- return state / 2; 0292 -- end if; 0293 -- end 0294 -- $$ language plpgsql; 0295 0296 -- create aggregate my_sum(int4) 0297 -- ( 0298 -- stype = int4, 0299 -- sfunc = sum_transfn 0300 -- ); 0301 0302 -- create aggregate my_half_sum(int4) 0303 -- ( 0304 -- stype = int4, 0305 -- sfunc = sum_transfn, 0306 -- finalfunc = halfsum_finalfn 0307 -- ); 0308 0309 -- Agg state should be shared even though my_sum has no finalfn 0310 -- select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one); 0311 0312 -- rollback; 0313 0314 0315 -- test that the aggregate transition logic correctly handles 0316 -- transition / combine functions returning NULL 0317 0318 -- First test the case of a normal transition function returning NULL 0319 -- BEGIN; 0320 -- CREATE FUNCTION balkifnull(int8, int4) 0321 -- RETURNS int8 0322 -- STRICT 0323 -- LANGUAGE plpgsql AS $$ 0324 -- BEGIN 0325 -- IF $1 IS NULL THEN 0326 -- RAISE 'erroneously called with NULL argument'; 0327 -- END IF; 0328 -- RETURN NULL; 0329 -- END$$; 0330 0331 -- CREATE AGGREGATE balk(int4) 0332 -- ( 0333 -- SFUNC = balkifnull(int8, int4), 0334 -- STYPE = int8, 0335 -- PARALLEL = SAFE, 0336 -- INITCOND = '0' 0337 -- ); 0338 0339 -- SELECT balk(hundred) FROM tenk1; 0340 0341 -- ROLLBACK; 0342 0343 -- Secondly test the case of a parallel aggregate combiner function 0344 -- returning NULL. For that use normal transition function, but a 0345 -- combiner function returning NULL. 0346 -- BEGIN ISOLATION LEVEL REPEATABLE READ; 0347 -- CREATE FUNCTION balkifnull(int8, int8) 0348 -- RETURNS int8 0349 -- PARALLEL SAFE 0350 -- STRICT 0351 -- LANGUAGE plpgsql AS $$ 0352 -- BEGIN 0353 -- IF $1 IS NULL THEN 0354 -- RAISE 'erroneously called with NULL argument'; 0355 -- END IF; 0356 -- RETURN NULL; 0357 -- END$$; 0358 0359 -- CREATE AGGREGATE balk(int4) 0360 -- ( 0361 -- SFUNC = int4_sum(int8, int4), 0362 -- STYPE = int8, 0363 -- COMBINEFUNC = balkifnull(int8, int8), 0364 -- PARALLEL = SAFE, 0365 -- INITCOND = '0' 0366 -- ); 0367 0368 -- force use of parallelism 0369 -- ALTER TABLE tenk1 set (parallel_workers = 4); 0370 -- SET LOCAL parallel_setup_cost=0; 0371 -- SET LOCAL max_parallel_workers_per_gather=4; 0372 0373 -- EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1; 0374 -- SELECT balk(hundred) FROM tenk1; 0375 0376 -- ROLLBACK; 0377 0378 -- test coverage for aggregate combine/serial/deserial functions 0379 -- BEGIN ISOLATION LEVEL REPEATABLE READ; 0380 0381 -- SET parallel_setup_cost = 0; 0382 -- SET parallel_tuple_cost = 0; 0383 -- SET min_parallel_table_scan_size = 0; 0384 -- SET max_parallel_workers_per_gather = 4; 0385 -- SET enable_indexonlyscan = off; 0386 0387 -- [SPARK-28663] Aggregate Functions for Statistics 0388 -- variance(int4) covers numeric_poly_combine 0389 -- sum(int8) covers int8_avg_combine 0390 -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg 0391 -- EXPLAIN (COSTS OFF, VERBOSE) 0392 -- SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; 0393 0394 -- SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; 0395 0396 -- ROLLBACK; 0397 0398 -- [SPARK-28661] Hypothetical-Set Aggregate Functions 0399 -- test coverage for dense_rank 0400 -- SELECT dense_rank(x) WITHIN GROUP (ORDER BY x) FROM (VALUES (1),(1),(2),(2),(3),(3)) v(x) GROUP BY (x) ORDER BY 1; 0401 0402 0403 -- [SPARK-28664] ORDER BY in aggregate function 0404 -- Ensure that the STRICT checks for aggregates does not take NULLness 0405 -- of ORDER BY columns into account. See bug report around 0406 -- 2a505161-2727-2473-7c46-591ed108ac52@email.cz 0407 -- SELECT min(x ORDER BY y) FROM (VALUES(1, NULL)) AS d(x,y); 0408 -- SELECT min(x ORDER BY y) FROM (VALUES(1, 2)) AS d(x,y); 0409 0410 -- [SPARK-28382] Array Functions: unnest 0411 -- check collation-sensitive matching between grouping expressions 0412 -- select v||'a', case v||'a' when 'aa' then 1 else 0 end, count(*) 0413 -- from unnest(array['a','b']) u(v) 0414 -- group by v||'a' order by 1; 0415 -- select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) 0416 -- from unnest(array['a','b']) u(v) 0417 -- group by v||'a' order by 1; 0418 0419 -- Make sure that generation of HashAggregate for uniqification purposes 0420 -- does not lead to array overflow due to unexpected duplicate hash keys 0421 -- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com 0422 -- explain (costs off) 0423 -- select 1 from tenk1 0424 -- where (hundred, thousand) in (select twothousand, twothousand from onek);
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |