Back to home page

OSCL-LXR

 
 

    


0001 -- Test window operator with codegen on and off.
0002 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
0003 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
0004 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
0005 
0006 -- Test data.
0007 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
0008 (null, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
0009 (1, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
0010 (1, 2L, 2.5D, date("2017-08-02"), timestamp(1502000000), "a"),
0011 (2, 2147483650L, 100.001D, date("2020-12-31"), timestamp(1609372800), "a"),
0012 (1, null, 1.0D, date("2017-08-01"), timestamp(1501545600), "b"),
0013 (2, 3L, 3.3D, date("2017-08-03"), timestamp(1503000000), "b"),
0014 (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp(1609372800), "b"),
0015 (null, null, null, null, null, null),
0016 (3, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), null)
0017 AS testData(val, val_long, val_double, val_date, val_timestamp, cate);
0018 
0019 -- RowsBetween
0020 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
0021 ORDER BY cate, val;
0022 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
0023 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0024 SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
0025 ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
0026 
0027 -- RangeBetween
0028 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
0029 ORDER BY cate, val;
0030 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
0031 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0032 SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
0033 RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
0034 SELECT val_double, cate, sum(val_double) OVER(PARTITION BY cate ORDER BY val_double
0035 RANGE BETWEEN CURRENT ROW AND 2.5 FOLLOWING) FROM testData ORDER BY cate, val_double;
0036 SELECT val_date, cate, max(val_date) OVER(PARTITION BY cate ORDER BY val_date
0037 RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate, val_date;
0038 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
0039 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
0040 ORDER BY cate, val_timestamp;
0041 
0042 -- RangeBetween with reverse OrderBy
0043 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
0044 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0045 
0046 -- Invalid window frame
0047 SELECT val, cate, count(val) OVER(PARTITION BY cate
0048 ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0049 SELECT val, cate, count(val) OVER(PARTITION BY cate
0050 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0051 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
0052 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0053 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_timestamp
0054 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
0055 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
0056 RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val;
0057 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
0058 RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val;
0059 
0060 
0061 -- Window functions
0062 SELECT val, cate,
0063 max(val) OVER w AS max,
0064 min(val) OVER w AS min,
0065 min(val) OVER w AS min,
0066 count(val) OVER w AS count,
0067 sum(val) OVER w AS sum,
0068 avg(val) OVER w AS avg,
0069 stddev(val) OVER w AS stddev,
0070 first_value(val) OVER w AS first_value,
0071 first_value(val, true) OVER w AS first_value_ignore_null,
0072 first_value(val, false) OVER w AS first_value_contain_null,
0073 last_value(val) OVER w AS last_value,
0074 last_value(val, true) OVER w AS last_value_ignore_null,
0075 last_value(val, false) OVER w AS last_value_contain_null,
0076 rank() OVER w AS rank,
0077 dense_rank() OVER w AS dense_rank,
0078 cume_dist() OVER w AS cume_dist,
0079 percent_rank() OVER w AS percent_rank,
0080 ntile(2) OVER w AS ntile,
0081 row_number() OVER w AS row_number,
0082 var_pop(val) OVER w AS var_pop,
0083 var_samp(val) OVER w AS var_samp,
0084 approx_count_distinct(val) OVER w AS approx_count_distinct,
0085 covar_pop(val, val_long) OVER w AS covar_pop,
0086 corr(val, val_long) OVER w AS corr,
0087 stddev_samp(val) OVER w AS stddev_samp,
0088 stddev_pop(val) OVER w AS stddev_pop,
0089 collect_list(val) OVER w AS collect_list,
0090 collect_set(val) OVER w AS collect_set,
0091 skewness(val_double) OVER w AS skewness,
0092 kurtosis(val_double) OVER w AS kurtosis
0093 FROM testData
0094 WINDOW w AS (PARTITION BY cate ORDER BY val)
0095 ORDER BY cate, val;
0096 
0097 -- Null inputs
0098 SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val;
0099 
0100 -- OrderBy not specified
0101 SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val;
0102 
0103 -- Over clause is empty
0104 SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
0105 
0106 -- first_value()/last_value() over ()
0107 SELECT val, cate,
0108 first_value(false) OVER w AS first_value,
0109 first_value(true, true) OVER w AS first_value_ignore_null,
0110 first_value(false, false) OVER w AS first_value_contain_null,
0111 last_value(false) OVER w AS last_value,
0112 last_value(true, true) OVER w AS last_value_ignore_null,
0113 last_value(false, false) OVER w AS last_value_contain_null
0114 FROM testData
0115 WINDOW w AS ()
0116 ORDER BY cate, val;
0117 
0118 -- parentheses around window reference
0119 SELECT cate, sum(val) OVER (w)
0120 FROM testData
0121 WHERE val is not null
0122 WINDOW w AS (PARTITION BY cate ORDER BY val);
0123 
0124 -- with filter predicate
0125 SELECT val, cate,
0126 count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
0127 FROM testData ORDER BY cate, val;