inputs/postgreSQL/aggregates_part4.sql

0001 --
0002 -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
0003 --
0004 --
0005 -- AGGREGATES [Part 4]
0006 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997
0007
0008 -- Test aggregate operator with codegen on and off.
0009 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
0010 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
0011 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
0012
0013 -- [SPARK-27980] Ordered-Set Aggregate Functions
0014 -- ordered-set aggregates
0015
0016 -- select p, percentile_cont(p) within group (order by x::float8)
0017 -- from generate_series(1,5) x,
0018 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0019 -- group by p order by p;
0020
0021 -- select p, percentile_cont(p order by p) within group (order by x)  -- error
0022 -- from generate_series(1,5) x,
0023 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0024 -- group by p order by p;
0025
0026 -- select p, sum() within group (order by x::float8)  -- error
0027 -- from generate_series(1,5) x,
0028 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0029 -- group by p order by p;
0030
0031 -- select p, percentile_cont(p,p)  -- error
0032 -- from generate_series(1,5) x,
0033 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0034 -- group by p order by p;
0035
0036 -- select percentile_cont(0.5) within group (order by b) from aggtest;
0037 -- select percentile_cont(0.5) within group (order by b), sum(b) from aggtest;
0038 -- select percentile_cont(0.5) within group (order by thousand) from tenk1;
0039 -- select percentile_disc(0.5) within group (order by thousand) from tenk1;
0040 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0041 -- select rank(3) within group (order by x)
0042 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0043 -- select cume_dist(3) within group (order by x)
0044 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0045 -- select percent_rank(3) within group (order by x)
0046 -- from (values (1),(1),(2),(2),(3),(3),(4),(5)) v(x);
0047 -- select dense_rank(3) within group (order by x)
0048 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0049
0050 -- [SPARK-27980] Ordered-Set Aggregate Functions
0051 -- select percentile_disc(array[0,0.1,0.25,0.5,0.75,0.9,1]) within group (order by thousand)
0052 -- from tenk1;
0053 -- select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand)
0054 -- from tenk1;
0055 -- select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand)
0056 -- from tenk1;
0057 -- select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
0058 -- from generate_series(1,6) x;
0059
0060 -- [SPARK-27980] Ordered-Set Aggregate Functions
0061 -- [SPARK-28382] Array Functions: unnest
0062 -- select ten, mode() within group (order by string4) from tenk1 group by ten;
0063
0064 -- select percentile_disc(array[0.25,0.5,0.75]) within group (order by x)
0065 -- from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::text[]) u(x);
0066
0067 -- [SPARK-28669] System Information Functions
0068 -- check collation propagates up in suitable cases:
0069 -- select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX"))
0070 --   from (values ('fred'),('jim')) v(x);
0071
0072 -- test_rank and test_percentile_disc function created by create_aggregate.sql
0073 -- ordered-set aggs created with CREATE AGGREGATE
0074 -- select test_rank(3) within group (order by x)
0075 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0076 -- select test_percentile_disc(0.5) within group (order by thousand) from tenk1;
0077
0078 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0079 -- ordered-set aggs can't use ungrouped vars in direct args:
0080 -- select rank(x) within group (order by x) from generate_series(1,5) x;
0081
0082 -- [SPARK-27980] Ordered-Set Aggregate Functions
0083 -- outer-level agg can't use a grouped arg of a lower level, either:
0084 -- select array(select percentile_disc(a) within group (order by x)
0085 --                from (values (0.3),(0.7)) v(a) group by a)
0086 --   from generate_series(1,5) g(x);
0087
0088 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0089 -- agg in the direct args is a grouping violation, too:
0090 --select rank(sum(x)) within group (order by x) from generate_series(1,5) x;
0091
0092 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0093 -- hypothetical-set type unification and argument-count failures:
0094 -- select rank(3) within group (order by x) from (values ('fred'),('jim')) v(x);
0095 -- select rank(3) within group (order by stringu1,stringu2) from tenk1;
0096 -- select rank('fred') within group (order by x) from generate_series(1,5) x;
0097 -- select rank('adam'::text collate "C") within group (order by x collate "POSIX")
0098 --   from (values ('fred'),('jim')) v(x);
0099 -- hypothetical-set type unification successes:
0100 -- select rank('adam'::varchar) within group (order by x) from (values ('fred'),('jim')) v(x);
0101 -- select rank('3') within group (order by x) from generate_series(1,5) x;
0102
0103 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0104 -- divide by zero check
0105 -- select percent_rank(0) within group (order by x) from generate_series(1,0) x;
0106
0107 -- [SPARK-27980] Ordered-Set Aggregate Functions
0108 -- deparse and multiple features:
0109 -- create view aggordview1 as
0110 -- select ten,
0111 --        percentile_disc(0.5) within group (order by thousand) as p50,
0112 --        percentile_disc(0.5) within group (order by thousand) filter (where hundred=1) as px,
0113 --        rank(5,'AZZZZ',50) within group (order by hundred, string4 desc, hundred)
0114 --   from tenk1
0115 --  group by ten order by ten;
0116
0117 -- select pg_get_viewdef('aggordview1');
0118 -- select * from aggordview1 order by ten;
0119 -- drop view aggordview1;
0120
0121 -- least_agg created by create_aggregate.sql
0122 -- variadic aggregates
0123 -- select least_agg(q1,q2) from int8_tbl;
0124 -- select least_agg(variadic array[q1,q2]) from int8_tbl;
0125
0126
0127 -- Skip these tests because we do not support create type
0128 -- test aggregates with common transition functions share the same states
0129 -- begin work;
0130
0131 -- create type avg_state as (total bigint, count bigint);
0132
0133 -- create or replace function avg_transfn(state avg_state, n int) returns avg_state as
0134 -- $$
0135 -- declare new_state avg_state;
0136 -- begin
0137 --      raise notice 'avg_transfn called with %', n;
0138 --      if state is null then
0139 --              if n is not null then
0140 --                      new_state.total := n;
0141 --                      new_state.count := 1;
0142 --                      return new_state;
0143 --              end if;
0144 --              return null;
0145 --      elsif n is not null then
0146 --              state.total := state.total + n;
0147 --              state.count := state.count + 1;
0148 --              return state;
0149 --      end if;
0150 --
0151 --      return null;
0152 -- end
0153 -- $$ language plpgsql;
0154
0155 -- create function avg_finalfn(state avg_state) returns int4 as
0156 -- $$
0157 -- begin
0158 --      if state is null then
0159 --              return NULL;
0160 --      else
0161 --              return state.total / state.count;
0162 --      end if;
0163 -- end
0164 -- $$ language plpgsql;
0165
0166 -- create function sum_finalfn(state avg_state) returns int4 as
0167 -- $$
0168 -- begin
0169 --      if state is null then
0170 --              return NULL;
0171 --      else
0172 --              return state.total;
0173 --      end if;
0174 -- end
0175 -- $$ language plpgsql;
0176
0177 -- create aggregate my_avg(int4)
0178 -- (
0179 --    stype = avg_state,
0180 --    sfunc = avg_transfn,
0181 --    finalfunc = avg_finalfn
0182 -- );
0183 --
0184 -- create aggregate my_sum(int4)
0185 -- (
0186 --    stype = avg_state,
0187 --    sfunc = avg_transfn,
0188 --    finalfunc = sum_finalfn
0189 -- );
0190
0191 -- aggregate state should be shared as aggs are the same.
0192 -- select my_avg(one),my_avg(one) from (values(1),(3)) t(one);
0193
0194 -- aggregate state should be shared as transfn is the same for both aggs.
0195 -- select my_avg(one),my_sum(one) from (values(1),(3)) t(one);
0196
0197 -- same as previous one, but with DISTINCT, which requires sorting the input.
0198 -- select my_avg(distinct one),my_sum(distinct one) from (values(1),(3),(1)) t(one);
0199
0200 -- shouldn't share states due to the distinctness not matching.
0201 -- select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
0202
0203 -- shouldn't share states due to the filter clause not matching.
0204 -- select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one);
0205
0206 -- this should not share the state due to different input columns.
0207 -- select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
0208
0209 -- [SPARK-27980] Ordered-Set Aggregate Functions
0210 -- exercise cases where OSAs share state
0211 -- select
0212 --   percentile_cont(0.5) within group (order by a),
0213 --   percentile_disc(0.5) within group (order by a)
0214 -- from (values(1::float8),(3),(5),(7)) t(a);
0215
0216 -- select
0217 --   percentile_cont(0.25) within group (order by a),
0218 --   percentile_disc(0.5) within group (order by a)
0219 -- from (values(1::float8),(3),(5),(7)) t(a);
0220
0221 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0222 -- these can't share state currently
0223 -- select
0224 --   rank(4) within group (order by a),
0225 --   dense_rank(4) within group (order by a)
0226 -- from (values(1),(3),(5),(7)) t(a);
0227
0228 -- test that aggs with the same sfunc and initcond share the same agg state
0229 -- create aggregate my_sum_init(int4)
0230 -- (
0231 --    stype = avg_state,
0232 --    sfunc = avg_transfn,
0233 --    finalfunc = sum_finalfn,
0234 --    initcond = '(10,0)'
0235 -- );
0236
0237 -- create aggregate my_avg_init(int4)
0238 -- (
0239 --    stype = avg_state,
0240 --    sfunc = avg_transfn,
0241 --    finalfunc = avg_finalfn,
0242 --    initcond = '(10,0)'
0243 -- );
0244
0245 -- create aggregate my_avg_init2(int4)
0246 -- (
0247 --    stype = avg_state,
0248 --    sfunc = avg_transfn,
0249 --    finalfunc = avg_finalfn,
0250 --    initcond = '(4,0)'
0251 -- );
0252
0253 -- state should be shared if INITCONDs are matching
0254 -- select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
0255
0256 -- Varying INITCONDs should cause the states not to be shared.
0257 -- select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one);
0258
0259 -- rollback;
0260
0261 -- test aggregate state sharing to ensure it works if one aggregate has a
0262 -- finalfn and the other one has none.
0263 -- begin work;
0264
0265 -- create or replace function sum_transfn(state int4, n int4) returns int4 as
0266 -- $$
0267 -- declare new_state int4;
0268 -- begin
0269 --      raise notice 'sum_transfn called with %', n;
0270 --      if state is null then
0271 --              if n is not null then
0272 --                      new_state := n;
0273 --                      return new_state;
0274 --              end if;
0275 --              return null;
0276 --      elsif n is not null then
0277 --              state := state + n;
0278 --              return state;
0279 --      end if;
0280 --
0281 --      return null;
0282 -- end
0283 -- $$ language plpgsql;
0284
0285 -- create function halfsum_finalfn(state int4) returns int4 as
0286 -- $$
0287 -- begin
0288 --      if state is null then
0289 --              return NULL;
0290 --      else
0291 --              return state / 2;
0292 --      end if;
0293 -- end
0294 -- $$ language plpgsql;
0295
0296 -- create aggregate my_sum(int4)
0297 -- (
0298 --    stype = int4,
0299 --    sfunc = sum_transfn
0300 -- );
0301
0302 -- create aggregate my_half_sum(int4)
0303 -- (
0304 --    stype = int4,
0305 --    sfunc = sum_transfn,
0306 --    finalfunc = halfsum_finalfn
0307 -- );
0308
0309 -- Agg state should be shared even though my_sum has no finalfn
0310 -- select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
0311
0312 -- rollback;
0313
0314
0315 -- test that the aggregate transition logic correctly handles
0316 -- transition / combine functions returning NULL
0317
0318 -- First test the case of a normal transition function returning NULL
0319 -- BEGIN;
0320 -- CREATE FUNCTION balkifnull(int8, int4)
0321 -- RETURNS int8
0322 -- STRICT
0323 -- LANGUAGE plpgsql AS $$
0324 -- BEGIN
0325 --     IF $1 IS NULL THEN
0326 --        RAISE 'erroneously called with NULL argument';
0327 --     END IF;
0328 --     RETURN NULL;
0329 -- END$$;
0330
0331 -- CREATE AGGREGATE balk(int4)
0332 -- (
0333 --     SFUNC = balkifnull(int8, int4),
0334 --     STYPE = int8,
0335 --     PARALLEL = SAFE,
0336 --     INITCOND = '0'
0337 -- );
0338
0339 -- SELECT balk(hundred) FROM tenk1;
0340
0341 -- ROLLBACK;
0342
0343 -- Secondly test the case of a parallel aggregate combiner function
0344 -- returning NULL. For that use normal transition function, but a
0345 -- combiner function returning NULL.
0346 -- BEGIN ISOLATION LEVEL REPEATABLE READ;
0347 -- CREATE FUNCTION balkifnull(int8, int8)
0348 -- RETURNS int8
0349 -- PARALLEL SAFE
0350 -- STRICT
0351 -- LANGUAGE plpgsql AS $$
0352 -- BEGIN
0353 --     IF $1 IS NULL THEN
0354 --        RAISE 'erroneously called with NULL argument';
0355 --     END IF;
0356 --     RETURN NULL;
0357 -- END$$;
0358
0359 -- CREATE AGGREGATE balk(int4)
0360 -- (
0361 --     SFUNC = int4_sum(int8, int4),
0362 --     STYPE = int8,
0363 --     COMBINEFUNC = balkifnull(int8, int8),
0364 --     PARALLEL = SAFE,
0365 --     INITCOND = '0'
0366 -- );
0367
0368 -- force use of parallelism
0369 -- ALTER TABLE tenk1 set (parallel_workers = 4);
0370 -- SET LOCAL parallel_setup_cost=0;
0371 -- SET LOCAL max_parallel_workers_per_gather=4;
0372
0373 -- EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
0374 -- SELECT balk(hundred) FROM tenk1;
0375
0376 -- ROLLBACK;
0377
0378 -- test coverage for aggregate combine/serial/deserial functions
0379 -- BEGIN ISOLATION LEVEL REPEATABLE READ;
0380
0381 -- SET parallel_setup_cost = 0;
0382 -- SET parallel_tuple_cost = 0;
0383 -- SET min_parallel_table_scan_size = 0;
0384 -- SET max_parallel_workers_per_gather = 4;
0385 -- SET enable_indexonlyscan = off;
0386
0387 -- [SPARK-28663] Aggregate Functions for Statistics
0388 -- variance(int4) covers numeric_poly_combine
0389 -- sum(int8) covers int8_avg_combine
0390 -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg
0391 -- EXPLAIN (COSTS OFF, VERBOSE)
0392 --   SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1;
0393
0394 -- SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1;
0395
0396 -- ROLLBACK;
0397
0398 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0399 -- test coverage for dense_rank
0400 -- SELECT dense_rank(x) WITHIN GROUP (ORDER BY x) FROM (VALUES (1),(1),(2),(2),(3),(3)) v(x) GROUP BY (x) ORDER BY 1;
0401
0402
0403 -- [SPARK-28664] ORDER BY in aggregate function
0404 -- Ensure that the STRICT checks for aggregates does not take NULLness
0405 -- of ORDER BY columns into account. See bug report around
0406 -- 2a505161-2727-2473-7c46-591ed108ac52@email.cz
0407 -- SELECT min(x ORDER BY y) FROM (VALUES(1, NULL)) AS d(x,y);
0408 -- SELECT min(x ORDER BY y) FROM (VALUES(1, 2)) AS d(x,y);
0409
0410 -- [SPARK-28382] Array Functions: unnest
0411 -- check collation-sensitive matching between grouping expressions
0412 -- select v||'a', case v||'a' when 'aa' then 1 else 0 end, count(*)
0413 --   from unnest(array['a','b']) u(v)
0414 --  group by v||'a' order by 1;
0415 -- select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*)
0416 --   from unnest(array['a','b']) u(v)
0417 --  group by v||'a' order by 1;
0418
0419 -- Make sure that generation of HashAggregate for uniqification purposes
0420 -- does not lead to array overflow due to unexpected duplicate hash keys
0421 -- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com
0422 -- explain (costs off)
0423 --   select 1 from tenk1
0424 --    where (hundred, thousand) in (select twothousand, twothousand from onek);