udf/postgreSQL/udf-aggregates_part4.sql

0001 --
0002 -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
0003 --
0004 --
0005 -- AGGREGATES [Part 4]
0006 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997
0007
0008 -- This test file was converted from postgreSQL/aggregates_part4.sql.
0009
0010 -- [SPARK-27980] Ordered-Set Aggregate Functions
0011 -- ordered-set aggregates
0012
0013 -- select p, percentile_cont(p) within group (order by x::float8)
0014 -- from generate_series(1,5) x,
0015 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0016 -- group by p order by p;
0017
0018 -- select p, percentile_cont(p order by p) within group (order by x)  -- error
0019 -- from generate_series(1,5) x,
0020 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0021 -- group by p order by p;
0022
0023 -- select p, sum() within group (order by x::float8)  -- error
0024 -- from generate_series(1,5) x,
0025 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0026 -- group by p order by p;
0027
0028 -- select p, percentile_cont(p,p)  -- error
0029 -- from generate_series(1,5) x,
0030 --      (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p)
0031 -- group by p order by p;
0032
0033 -- select percentile_cont(0.5) within group (order by b) from aggtest;
0034 -- select percentile_cont(0.5) within group (order by b), sum(b) from aggtest;
0035 -- select percentile_cont(0.5) within group (order by thousand) from tenk1;
0036 -- select percentile_disc(0.5) within group (order by thousand) from tenk1;
0037 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0038 -- select rank(3) within group (order by x)
0039 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0040 -- select cume_dist(3) within group (order by x)
0041 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0042 -- select percent_rank(3) within group (order by x)
0043 -- from (values (1),(1),(2),(2),(3),(3),(4),(5)) v(x);
0044 -- select dense_rank(3) within group (order by x)
0045 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0046
0047 -- [SPARK-27980] Ordered-Set Aggregate Functions
0048 -- select percentile_disc(array[0,0.1,0.25,0.5,0.75,0.9,1]) within group (order by thousand)
0049 -- from tenk1;
0050 -- select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand)
0051 -- from tenk1;
0052 -- select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand)
0053 -- from tenk1;
0054 -- select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
0055 -- from generate_series(1,6) x;
0056
0057 -- [SPARK-27980] Ordered-Set Aggregate Functions
0058 -- [SPARK-28382] Array Functions: unnest
0059 -- select ten, mode() within group (order by string4) from tenk1 group by ten;
0060
0061 -- select percentile_disc(array[0.25,0.5,0.75]) within group (order by x)
0062 -- from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::text[]) u(x);
0063
0064 -- [SPARK-28669] System Information Functions
0065 -- check collation propagates up in suitable cases:
0066 -- select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX"))
0067 --   from (values ('fred'),('jim')) v(x);
0068
0069 -- test_rank and test_percentile_disc function created by create_aggregate.sql
0070 -- ordered-set aggs created with CREATE AGGREGATE
0071 -- select test_rank(3) within group (order by x)
0072 -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x);
0073 -- select test_percentile_disc(0.5) within group (order by thousand) from tenk1;
0074
0075 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0076 -- ordered-set aggs can't use ungrouped vars in direct args:
0077 -- select rank(x) within group (order by x) from generate_series(1,5) x;
0078
0079 -- [SPARK-27980] Ordered-Set Aggregate Functions
0080 -- outer-level agg can't use a grouped arg of a lower level, either:
0081 -- select array(select percentile_disc(a) within group (order by x)
0082 --                from (values (0.3),(0.7)) v(a) group by a)
0083 --   from generate_series(1,5) g(x);
0084
0085 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0086 -- agg in the direct args is a grouping violation, too:
0087 --select rank(sum(x)) within group (order by x) from generate_series(1,5) x;
0088
0089 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0090 -- hypothetical-set type unification and argument-count failures:
0091 -- select rank(3) within group (order by x) from (values ('fred'),('jim')) v(x);
0092 -- select rank(3) within group (order by stringu1,stringu2) from tenk1;
0093 -- select rank('fred') within group (order by x) from generate_series(1,5) x;
0094 -- select rank('adam'::text collate "C") within group (order by x collate "POSIX")
0095 --   from (values ('fred'),('jim')) v(x);
0096 -- hypothetical-set type unification successes:
0097 -- select rank('adam'::varchar) within group (order by x) from (values ('fred'),('jim')) v(x);
0098 -- select rank('3') within group (order by x) from generate_series(1,5) x;
0099
0100 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0101 -- divide by zero check
0102 -- select percent_rank(0) within group (order by x) from generate_series(1,0) x;
0103
0104 -- [SPARK-27980] Ordered-Set Aggregate Functions
0105 -- deparse and multiple features:
0106 -- create view aggordview1 as
0107 -- select ten,
0108 --        percentile_disc(0.5) within group (order by thousand) as p50,
0109 --        percentile_disc(0.5) within group (order by thousand) filter (where hundred=1) as px,
0110 --        rank(5,'AZZZZ',50) within group (order by hundred, string4 desc, hundred)
0111 --   from tenk1
0112 --  group by ten order by ten;
0113
0114 -- select pg_get_viewdef('aggordview1');
0115 -- select * from aggordview1 order by ten;
0116 -- drop view aggordview1;
0117
0118 -- least_agg created by create_aggregate.sql
0119 -- variadic aggregates
0120 -- select least_agg(q1,q2) from int8_tbl;
0121 -- select least_agg(variadic array[q1,q2]) from int8_tbl;
0122
0123
0124 -- Skip these tests because we do not support create type
0125 -- test aggregates with common transition functions share the same states
0126 -- begin work;
0127
0128 -- create type avg_state as (total bigint, count bigint);
0129
0130 -- create or replace function avg_transfn(state avg_state, n int) returns avg_state as
0131 -- $$
0132 -- declare new_state avg_state;
0133 -- begin
0134 --      raise notice 'avg_transfn called with %', n;
0135 --      if state is null then
0136 --              if n is not null then
0137 --                      new_state.total := n;
0138 --                      new_state.count := 1;
0139 --                      return new_state;
0140 --              end if;
0141 --              return null;
0142 --      elsif n is not null then
0143 --              state.total := state.total + n;
0144 --              state.count := state.count + 1;
0145 --              return state;
0146 --      end if;
0147 --
0148 --      return null;
0149 -- end
0150 -- $$ language plpgsql;
0151
0152 -- create function avg_finalfn(state avg_state) returns int4 as
0153 -- $$
0154 -- begin
0155 --      if state is null then
0156 --              return NULL;
0157 --      else
0158 --              return state.total / state.count;
0159 --      end if;
0160 -- end
0161 -- $$ language plpgsql;
0162
0163 -- create function sum_finalfn(state avg_state) returns int4 as
0164 -- $$
0165 -- begin
0166 --      if state is null then
0167 --              return NULL;
0168 --      else
0169 --              return state.total;
0170 --      end if;
0171 -- end
0172 -- $$ language plpgsql;
0173
0174 -- create aggregate my_avg(int4)
0175 -- (
0176 --    stype = avg_state,
0177 --    sfunc = avg_transfn,
0178 --    finalfunc = avg_finalfn
0179 -- );
0180 --
0181 -- create aggregate my_sum(int4)
0182 -- (
0183 --    stype = avg_state,
0184 --    sfunc = avg_transfn,
0185 --    finalfunc = sum_finalfn
0186 -- );
0187
0188 -- aggregate state should be shared as aggs are the same.
0189 -- select my_avg(one),my_avg(one) from (values(1),(3)) t(one);
0190
0191 -- aggregate state should be shared as transfn is the same for both aggs.
0192 -- select my_avg(one),my_sum(one) from (values(1),(3)) t(one);
0193
0194 -- same as previous one, but with DISTINCT, which requires sorting the input.
0195 -- select my_avg(distinct one),my_sum(distinct one) from (values(1),(3),(1)) t(one);
0196
0197 -- shouldn't share states due to the distinctness not matching.
0198 -- select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
0199
0200 -- shouldn't share states due to the filter clause not matching.
0201 -- select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one);
0202
0203 -- this should not share the state due to different input columns.
0204 -- select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
0205
0206 -- [SPARK-27980] Ordered-Set Aggregate Functions
0207 -- exercise cases where OSAs share state
0208 -- select
0209 --   percentile_cont(0.5) within group (order by a),
0210 --   percentile_disc(0.5) within group (order by a)
0211 -- from (values(1::float8),(3),(5),(7)) t(a);
0212
0213 -- select
0214 --   percentile_cont(0.25) within group (order by a),
0215 --   percentile_disc(0.5) within group (order by a)
0216 -- from (values(1::float8),(3),(5),(7)) t(a);
0217
0218 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0219 -- these can't share state currently
0220 -- select
0221 --   rank(4) within group (order by a),
0222 --   dense_rank(4) within group (order by a)
0223 -- from (values(1),(3),(5),(7)) t(a);
0224
0225 -- test that aggs with the same sfunc and initcond share the same agg state
0226 -- create aggregate my_sum_init(int4)
0227 -- (
0228 --    stype = avg_state,
0229 --    sfunc = avg_transfn,
0230 --    finalfunc = sum_finalfn,
0231 --    initcond = '(10,0)'
0232 -- );
0233
0234 -- create aggregate my_avg_init(int4)
0235 -- (
0236 --    stype = avg_state,
0237 --    sfunc = avg_transfn,
0238 --    finalfunc = avg_finalfn,
0239 --    initcond = '(10,0)'
0240 -- );
0241
0242 -- create aggregate my_avg_init2(int4)
0243 -- (
0244 --    stype = avg_state,
0245 --    sfunc = avg_transfn,
0246 --    finalfunc = avg_finalfn,
0247 --    initcond = '(4,0)'
0248 -- );
0249
0250 -- state should be shared if INITCONDs are matching
0251 -- select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
0252
0253 -- Varying INITCONDs should cause the states not to be shared.
0254 -- select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one);
0255
0256 -- rollback;
0257
0258 -- test aggregate state sharing to ensure it works if one aggregate has a
0259 -- finalfn and the other one has none.
0260 -- begin work;
0261
0262 -- create or replace function sum_transfn(state int4, n int4) returns int4 as
0263 -- $$
0264 -- declare new_state int4;
0265 -- begin
0266 --      raise notice 'sum_transfn called with %', n;
0267 --      if state is null then
0268 --              if n is not null then
0269 --                      new_state := n;
0270 --                      return new_state;
0271 --              end if;
0272 --              return null;
0273 --      elsif n is not null then
0274 --              state := state + n;
0275 --              return state;
0276 --      end if;
0277 --
0278 --      return null;
0279 -- end
0280 -- $$ language plpgsql;
0281
0282 -- create function halfsum_finalfn(state int4) returns int4 as
0283 -- $$
0284 -- begin
0285 --      if state is null then
0286 --              return NULL;
0287 --      else
0288 --              return state / 2;
0289 --      end if;
0290 -- end
0291 -- $$ language plpgsql;
0292
0293 -- create aggregate my_sum(int4)
0294 -- (
0295 --    stype = int4,
0296 --    sfunc = sum_transfn
0297 -- );
0298
0299 -- create aggregate my_half_sum(int4)
0300 -- (
0301 --    stype = int4,
0302 --    sfunc = sum_transfn,
0303 --    finalfunc = halfsum_finalfn
0304 -- );
0305
0306 -- Agg state should be shared even though my_sum has no finalfn
0307 -- select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
0308
0309 -- rollback;
0310
0311
0312 -- test that the aggregate transition logic correctly handles
0313 -- transition / combine functions returning NULL
0314
0315 -- First test the case of a normal transition function returning NULL
0316 -- BEGIN;
0317 -- CREATE FUNCTION balkifnull(int8, int4)
0318 -- RETURNS int8
0319 -- STRICT
0320 -- LANGUAGE plpgsql AS $$
0321 -- BEGIN
0322 --     IF $1 IS NULL THEN
0323 --        RAISE 'erroneously called with NULL argument';
0324 --     END IF;
0325 --     RETURN NULL;
0326 -- END$$;
0327
0328 -- CREATE AGGREGATE balk(int4)
0329 -- (
0330 --     SFUNC = balkifnull(int8, int4),
0331 --     STYPE = int8,
0332 --     PARALLEL = SAFE,
0333 --     INITCOND = '0'
0334 -- );
0335
0336 -- SELECT balk(hundred) FROM tenk1;
0337
0338 -- ROLLBACK;
0339
0340 -- Secondly test the case of a parallel aggregate combiner function
0341 -- returning NULL. For that use normal transition function, but a
0342 -- combiner function returning NULL.
0343 -- BEGIN ISOLATION LEVEL REPEATABLE READ;
0344 -- CREATE FUNCTION balkifnull(int8, int8)
0345 -- RETURNS int8
0346 -- PARALLEL SAFE
0347 -- STRICT
0348 -- LANGUAGE plpgsql AS $$
0349 -- BEGIN
0350 --     IF $1 IS NULL THEN
0351 --        RAISE 'erroneously called with NULL argument';
0352 --     END IF;
0353 --     RETURN NULL;
0354 -- END$$;
0355
0356 -- CREATE AGGREGATE balk(int4)
0357 -- (
0358 --     SFUNC = int4_sum(int8, int4),
0359 --     STYPE = int8,
0360 --     COMBINEFUNC = balkifnull(int8, int8),
0361 --     PARALLEL = SAFE,
0362 --     INITCOND = '0'
0363 -- );
0364
0365 -- force use of parallelism
0366 -- ALTER TABLE tenk1 set (parallel_workers = 4);
0367 -- SET LOCAL parallel_setup_cost=0;
0368 -- SET LOCAL max_parallel_workers_per_gather=4;
0369
0370 -- EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
0371 -- SELECT balk(hundred) FROM tenk1;
0372
0373 -- ROLLBACK;
0374
0375 -- test coverage for aggregate combine/serial/deserial functions
0376 -- BEGIN ISOLATION LEVEL REPEATABLE READ;
0377
0378 -- SET parallel_setup_cost = 0;
0379 -- SET parallel_tuple_cost = 0;
0380 -- SET min_parallel_table_scan_size = 0;
0381 -- SET max_parallel_workers_per_gather = 4;
0382 -- SET enable_indexonlyscan = off;
0383
0384 -- [SPARK-28663] Aggregate Functions for Statistics
0385 -- variance(int4) covers numeric_poly_combine
0386 -- sum(int8) covers int8_avg_combine
0387 -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg
0388 -- EXPLAIN (COSTS OFF, VERBOSE)
0389 --   SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1;
0390
0391 -- SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1;
0392
0393 -- ROLLBACK;
0394
0395 -- [SPARK-28661] Hypothetical-Set Aggregate Functions
0396 -- test coverage for dense_rank
0397 -- SELECT dense_rank(x) WITHIN GROUP (ORDER BY x) FROM (VALUES (1),(1),(2),(2),(3),(3)) v(x) GROUP BY (x) ORDER BY 1;
0398
0399
0400 -- [SPARK-28664] ORDER BY in aggregate function
0401 -- Ensure that the STRICT checks for aggregates does not take NULLness
0402 -- of ORDER BY columns into account. See bug report around
0403 -- 2a505161-2727-2473-7c46-591ed108ac52@email.cz
0404 -- SELECT min(x ORDER BY y) FROM (VALUES(1, NULL)) AS d(x,y);
0405 -- SELECT min(x ORDER BY y) FROM (VALUES(1, 2)) AS d(x,y);
0406
0407 -- [SPARK-28382] Array Functions: unnest
0408 -- check collation-sensitive matching between grouping expressions
0409 -- select v||'a', case v||'a' when 'aa' then 1 else 0 end, count(*)
0410 --   from unnest(array['a','b']) u(v)
0411 --  group by v||'a' order by 1;
0412 -- select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*)
0413 --   from unnest(array['a','b']) u(v)
0414 --  group by v||'a' order by 1;
0415
0416 -- Make sure that generation of HashAggregate for uniqification purposes
0417 -- does not lead to array overflow due to unexpected duplicate hash keys
0418 -- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com
0419 -- explain (costs off)
0420 --   select 1 from tenk1
0421 --    where (hundred, thousand) in (select twothousand, twothousand from onek);