inputs/postgreSQL/strings.sql

0001 --
0002 -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
0003 --
0004 -- STRINGS
0005 -- -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/strings.sql
0006 -- Test various data entry syntaxes.
0007 --
0008
0009 -- SQL string continuation syntax
0010 -- E021-03 character string literals
0011 SELECT 'first line'
0012 ' - next line'
0013         ' - third line'
0014         AS `Three lines to one`;
0015
0016 -- Spark SQL support this string continuation syntax
0017 -- illegal string continuation syntax
0018 SELECT 'first line'
0019 ' - next line' /* this comment is not allowed here */
0020 ' - third line'
0021         AS `Illegal comment within continuation`;
0022
0023 -- [SPARK-28447] ANSI SQL: Unicode escapes in literals
0024 -- Unicode escapes
0025 -- SET standard_conforming_strings TO on;
0026
0027 -- SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
0028 -- SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
0029
0030 -- SELECT U&' \' UESCAPE '!' AS "tricky";
0031 -- SELECT 'tricky' AS U&"\" UESCAPE '!';
0032
0033 -- SELECT U&'wrong: \061';
0034 -- SELECT U&'wrong: \+0061';
0035 -- SELECT U&'wrong: +0061' UESCAPE '+';
0036
0037 -- SET standard_conforming_strings TO off;
0038
0039 -- SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
0040 -- SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
0041
0042 -- SELECT U&' \' UESCAPE '!' AS "tricky";
0043 -- SELECT 'tricky' AS U&"\" UESCAPE '!';
0044
0045 -- SELECT U&'wrong: \061';
0046 -- SELECT U&'wrong: \+0061';
0047 -- SELECT U&'wrong: +0061' UESCAPE '+';
0048
0049 -- RESET standard_conforming_strings;
0050
0051 -- Spark SQL only support escape mode
0052 -- bytea
0053 -- SET bytea_output TO hex;
0054 -- SELECT E'\\xDeAdBeEf'::bytea;
0055 -- SELECT E'\\x De Ad Be Ef '::bytea;
0056 -- SELECT E'\\xDeAdBeE'::bytea;
0057 -- SELECT E'\\xDeAdBeEx'::bytea;
0058 -- SELECT E'\\xDe00BeEf'::bytea;
0059 -- SELECT E'DeAdBeEf'::bytea;
0060 -- SELECT E'De\\000dBeEf'::bytea;
0061 -- SELECT E'De\123dBeEf'::bytea;
0062 -- SELECT E'De\\123dBeEf'::bytea;
0063 -- SELECT E'De\\678dBeEf'::bytea;
0064
0065 -- SET bytea_output TO escape;
0066 SELECT binary('\\xDeAdBeEf');
0067 SELECT binary('\\x De Ad Be Ef ');
0068 SELECT binary('\\xDe00BeEf');
0069 SELECT binary('DeAdBeEf');
0070 SELECT binary('De\\000dBeEf');
0071 SELECT binary('De\\123dBeEf');
0072
0073 -- Skip these tests because we do not have char/varchar type
0074 --
0075 -- test conversions between various string types
0076 -- E021-10 implicit casting among the character data types
0077 --
0078
0079 -- SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL;
0080
0081 -- SELECT CAST(f1 AS text) AS "text(varchar)" FROM VARCHAR_TBL;
0082
0083 -- SELECT CAST(name 'namefield' AS text) AS "text(name)";
0084
0085 -- since this is an explicit cast, it should truncate w/o error:
0086 -- SELECT CAST(f1 AS char(10)) AS "char(text)" FROM TEXT_TBL;
0087 -- note: implicit-cast case is tested in char.sql
0088
0089 -- SELECT CAST(f1 AS char(20)) AS "char(text)" FROM TEXT_TBL;
0090
0091 -- SELECT CAST(f1 AS char(10)) AS "char(varchar)" FROM VARCHAR_TBL;
0092
0093 -- SELECT CAST(name 'namefield' AS char(10)) AS "char(name)";
0094
0095 -- SELECT CAST(f1 AS varchar) AS "varchar(text)" FROM TEXT_TBL;
0096
0097 -- SELECT CAST(f1 AS varchar) AS "varchar(char)" FROM CHAR_TBL;
0098
0099 -- SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)";
0100
0101 --
0102 -- test SQL string functions
0103 -- E### and T### are feature reference numbers from SQL99
0104 --
0105
0106 -- E021-09 trim function
0107 SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS `bunch o blanks`;
0108
0109 SELECT TRIM(LEADING FROM '  bunch o blanks  ') = 'bunch o blanks  ' AS `bunch o blanks  `;
0110
0111 SELECT TRIM(TRAILING FROM '  bunch o blanks  ') = '  bunch o blanks' AS `  bunch o blanks`;
0112
0113 SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS `some Xs`;
0114
0115 -- E021-06 substring expression
0116 SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS `34567890`;
0117
0118 SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS `456`;
0119
0120 -- [SPARK-28076] Support regular expression substring
0121 -- T581 regular expression substring (with SQL's bizarre regexp syntax)
0122 -- SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
0123
0124 -- No match should return NULL
0125 -- SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
0126
0127 -- Null inputs should return NULL
0128 -- SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
0129 -- SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
0130 -- SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
0131
0132 -- The first and last parts should act non-greedy
0133 -- SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
0134 -- SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
0135
0136 -- Vertical bar in any part affects only that part
0137 -- SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
0138 -- SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
0139 -- SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
0140
0141 -- Can't have more than two part separators
0142 -- SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
0143
0144 -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
0145 -- SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
0146 -- SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
0147
0148 -- substring() with just two arguments is not allowed by SQL spec;
0149 -- we accept it, but we interpret the pattern as a POSIX regexp not SQL
0150 -- SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
0151
0152 -- With a parenthesized subexpression, return only what matches the subexpr
0153 -- SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
0154
0155 -- [SPARK-27952] String Functions: regexp_replace is not compatible
0156 -- PostgreSQL extension to allow using back reference in replace string;
0157 -- SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
0158 -- SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
0159 -- SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
0160 -- SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
0161 -- invalid regexp option
0162 -- SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
0163
0164 -- set so we can tell NULL from empty string
0165 -- \pset null '\\N'
0166
0167 -- [SPARK-28078] Add support other 4 REGEXP functions
0168 -- return all matches from regexp
0169 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
0170
0171 -- test case insensitive
0172 -- SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
0173
0174 -- global option - more than one match
0175 -- SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
0176
0177 -- empty capture group (matched empty string)
0178 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
0179 -- no match
0180 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
0181 -- optional capture group did not match, null entry in array
0182 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
0183
0184 -- no capture groups
0185 -- SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
0186
0187 -- start/end-of-line matches are of zero length
0188 -- SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
0189 -- SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
0190 -- SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
0191 -- SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
0192 -- SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
0193
0194 -- give me errors
0195 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
0196 -- SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
0197 -- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
0198
0199 -- split string on regexp
0200 -- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', $re$\s+$re$) AS foo;
0201 -- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', $re$\s+$re$);
0202
0203 -- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', $re$\s*$re$) AS foo;
0204 -- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', $re$\s*$re$);
0205 -- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '') AS foo;
0206 -- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '');
0207 -- case insensitive
0208 -- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'i') AS foo;
0209 -- -- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'i');
0210 -- no match of pattern
0211 -- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', 'nomatch') AS foo;
0212 -- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nomatch');
0213 -- some corner cases
0214 -- SELECT regexp_split_to_array('123456','1');
0215 -- SELECT regexp_split_to_array('123456','6');
0216 -- SELECT regexp_split_to_array('123456','.');
0217 -- SELECT regexp_split_to_array('123456','');
0218 -- SELECT regexp_split_to_array('123456','(?:)');
0219 -- SELECT regexp_split_to_array('1','');
0220 -- errors
0221 -- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
0222 -- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
0223 -- global option meaningless for regexp_split
0224 -- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
0225 -- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
0226
0227 -- change NULL-display back
0228 -- \pset null ''
0229
0230 -- E021-11 position expression
0231 SELECT POSITION('4' IN '1234567890') = '4' AS `4`;
0232
0233 SELECT POSITION('5' IN '1234567890') = '5' AS `5`;
0234
0235 -- [SPARK-28077] Add support string functions: OVERLAY
0236 -- T312 character overlay function
0237 SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS `abc45f`;
0238
0239 SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS `yabadaba`;
0240
0241 SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS `yabadabadoo`;
0242
0243 SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS `bubba`;
0244
0245 --
0246 -- test LIKE
0247 -- Be sure to form every test as a LIKE/NOT LIKE pair.
0248 --
0249
0250 -- simplest examples
0251 -- E061-04 like predicate
0252 SELECT 'hawkeye' LIKE 'h%' AS `true`;
0253 SELECT 'hawkeye' NOT LIKE 'h%' AS `false`;
0254
0255 SELECT 'hawkeye' LIKE 'H%' AS `false`;
0256 SELECT 'hawkeye' NOT LIKE 'H%' AS `true`;
0257
0258 SELECT 'hawkeye' LIKE 'indio%' AS `false`;
0259 SELECT 'hawkeye' NOT LIKE 'indio%' AS `true`;
0260
0261 SELECT 'hawkeye' LIKE 'h%eye' AS `true`;
0262 SELECT 'hawkeye' NOT LIKE 'h%eye' AS `false`;
0263
0264 SELECT 'indio' LIKE '_ndio' AS `true`;
0265 SELECT 'indio' NOT LIKE '_ndio' AS `false`;
0266
0267 SELECT 'indio' LIKE 'in__o' AS `true`;
0268 SELECT 'indio' NOT LIKE 'in__o' AS `false`;
0269
0270 SELECT 'indio' LIKE 'in_o' AS `false`;
0271 SELECT 'indio' NOT LIKE 'in_o' AS `true`;
0272
0273 -- unused escape character
0274 SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS `true`;
0275 SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS `false`;
0276
0277 SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS `true`;
0278 SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS `false`;
0279
0280 -- escape character
0281 -- E061-05 like predicate with escape clause
0282 SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS `true`;
0283 SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS `false`;
0284
0285 SELECT 'h%wkeye' LIKE 'h#%' ESCAPE '#' AS `false`;
0286 SELECT 'h%wkeye' NOT LIKE 'h#%' ESCAPE '#' AS `true`;
0287
0288 SELECT 'h%wkeye' LIKE 'h#%%' ESCAPE '#' AS `true`;
0289 SELECT 'h%wkeye' NOT LIKE 'h#%%' ESCAPE '#' AS `false`;
0290
0291 SELECT 'h%awkeye' LIKE 'h#%a%k%e' ESCAPE '#' AS `true`;
0292 SELECT 'h%awkeye' NOT LIKE 'h#%a%k%e' ESCAPE '#' AS `false`;
0293
0294 SELECT 'indio' LIKE '_ndio' ESCAPE '$' AS `true`;
0295 SELECT 'indio' NOT LIKE '_ndio' ESCAPE '$' AS `false`;
0296
0297 SELECT 'i_dio' LIKE 'i$_d_o' ESCAPE '$' AS `true`;
0298 SELECT 'i_dio' NOT LIKE 'i$_d_o' ESCAPE '$' AS `false`;
0299
0300 SELECT 'i_dio' LIKE 'i$_nd_o' ESCAPE '$' AS `false`;
0301 SELECT 'i_dio' NOT LIKE 'i$_nd_o' ESCAPE '$' AS `true`;
0302
0303 SELECT 'i_dio' LIKE 'i$_d%o' ESCAPE '$' AS `true`;
0304 SELECT 'i_dio' NOT LIKE 'i$_d%o' ESCAPE '$' AS `false`;
0305
0306 -- escape character same as pattern character
0307 SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true`;
0308 SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false`;
0309
0310 SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true`;
0311 SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false`;
0312
0313 SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true`;
0314 SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false`;
0315
0316 SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true`;
0317 SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false`;
0318
0319 SELECT 'be_r' LIKE '__e__r' ESCAPE '_' AS `false`;
0320 SELECT 'be_r' NOT LIKE '__e__r' ESCAPE '_' AS `true`;
0321
0322 -- [SPARK-28448] Implement ILIKE operator
0323 --
0324 -- test ILIKE (case-insensitive LIKE)
0325 -- Be sure to form every test as an ILIKE/NOT ILIKE pair.
0326 --
0327
0328 -- SELECT 'hawkeye' ILIKE 'h%' AS "true";
0329 -- SELECT 'hawkeye' NOT ILIKE 'h%' AS "false";
0330
0331 -- SELECT 'hawkeye' ILIKE 'H%' AS "true";
0332 -- SELECT 'hawkeye' NOT ILIKE 'H%' AS "false";
0333
0334 -- SELECT 'hawkeye' ILIKE 'H%Eye' AS "true";
0335 -- SELECT 'hawkeye' NOT ILIKE 'H%Eye' AS "false";
0336
0337 -- SELECT 'Hawkeye' ILIKE 'h%' AS "true";
0338 -- SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
0339
0340 --
0341 -- test %/_ combination cases, cf bugs #4821 and #5478
0342 --
0343
0344 SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
0345 SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
0346
0347 SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
0348 SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
0349
0350 SELECT 'jack' LIKE '%____%' AS t;
0351
0352
0353 -- Skip tests of LIKE with indexes
0354 --
0355 -- basic tests of LIKE with indexes
0356 --
0357
0358 -- CREATE TABLE texttest (a text PRIMARY KEY, b int);
0359 -- SELECT * FROM texttest WHERE a LIKE '%1%';
0360
0361 -- CREATE TABLE byteatest (a bytea PRIMARY KEY, b int);
0362 -- SELECT * FROM byteatest WHERE a LIKE '%1%';
0363
0364 -- DROP TABLE texttest, byteatest;
0365
0366
0367 --
0368 -- test implicit type conversion
0369 --
0370
0371 -- E021-07 character concatenation
0372 SELECT 'unknown' || ' and unknown' AS `Concat unknown types`;
0373
0374 SELECT string('text') || ' and unknown' AS `Concat text to unknown type`;
0375
0376 -- Spark SQL does not have char and varchar type
0377 -- SELECT char(20) 'characters' || ' and text' AS "Concat char to unknown type";
0378
0379 -- SELECT text 'text' || char(20) ' and characters' AS "Concat text to char";
0380
0381 -- SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar";
0382
0383 --
0384 -- test substr with toasted text values
0385 --
0386 CREATE TABLE toasttest(f1 string) USING parquet;
0387
0388 insert into toasttest values(repeat('1234567890',10000));
0389 insert into toasttest values(repeat('1234567890',10000));
0390
0391 --
0392 -- Ensure that some values are uncompressed, to test the faster substring
0393 -- operation used in that case
0394 --
0395 -- alter table toasttest alter column f1 set storage external;
0396 insert into toasttest values(repeat('1234567890',10000));
0397 insert into toasttest values(repeat('1234567890',10000));
0398
0399 -- [SPARK-28451] substr returns different values
0400 -- If the starting position is zero or less, then return from the start of the string
0401 -- adjusting the length to be consistent with the "negative start" per SQL.
0402 -- SELECT substr(f1, -1, 5) from toasttest;
0403
0404 -- If the length is less than zero, an ERROR is thrown.
0405 -- SELECT substr(f1, 5, -1) from toasttest;
0406
0407 -- If no third argument (length) is provided, the length to the end of the
0408 -- string is assumed.
0409 SELECT substr(f1, 99995) from toasttest;
0410
0411 -- If start plus length is > string length, the result is truncated to
0412 -- string length
0413 SELECT substr(f1, 99995, 10) from toasttest;
0414
0415 -- Skip these tests
0416 -- TRUNCATE TABLE toasttest;
0417 -- INSERT INTO toasttest values (repeat('1234567890',300));
0418 -- INSERT INTO toasttest values (repeat('1234567890',300));
0419 -- INSERT INTO toasttest values (repeat('1234567890',300));
0420 -- INSERT INTO toasttest values (repeat('1234567890',300));
0421 -- expect >0 blocks
0422 -- SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty
0423 --  FROM pg_class where relname = 'toasttest';
0424
0425 -- TRUNCATE TABLE toasttest;
0426 -- ALTER TABLE toasttest set (toast_tuple_target = 4080);
0427 -- INSERT INTO toasttest values (repeat('1234567890',300));
0428 -- INSERT INTO toasttest values (repeat('1234567890',300));
0429 -- INSERT INTO toasttest values (repeat('1234567890',300));
0430 -- INSERT INTO toasttest values (repeat('1234567890',300));
0431 -- expect 0 blocks
0432 -- SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty
0433 --  FROM pg_class where relname = 'toasttest';
0434
0435 -- DROP TABLE toasttest;
0436
0437 -- [SPARK-28121] decode can not accept 'escape' as charset
0438 --
0439 --
0440 -- test substr with toasted bytea values
0441 --
0442 -- CREATE TABLE toasttest(f1 binary) USING parquet;
0443
0444 -- insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
0445 -- insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
0446
0447 --
0448 -- Ensure that some values are uncompressed, to test the faster substring
0449 -- operation used in that case
0450 --
0451 -- alter table toasttest alter column f1 set storage external;
0452 -- insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
0453 -- insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
0454
0455 -- If the starting position is zero or less, then return from the start of the string
0456 -- adjusting the length to be consistent with the "negative start" per SQL.
0457 -- SELECT substr(f1, -1, 5) from toasttest;
0458
0459 -- If the length is less than zero, an ERROR is thrown.
0460 -- SELECT substr(f1, 5, -1) from toasttest;
0461
0462 -- If no third argument (length) is provided, the length to the end of the
0463 -- string is assumed.
0464 -- SELECT substr(f1, 99995) from toasttest;
0465
0466 -- If start plus length is > string length, the result is truncated to
0467 -- string length
0468 -- SELECT substr(f1, 99995, 10) from toasttest;
0469
0470 -- DROP TABLE toasttest;
0471
0472 -- Skip these tests because we do not support char type
0473 -- test internally compressing datums
0474
0475 -- this tests compressing a datum to a very small size which exercises a
0476 -- corner case in packed-varlena handling: even though small, the compressed
0477 -- datum must be given a 4-byte header because there are no bits to indicate
0478 -- compression in a 1-byte header
0479
0480 -- CREATE TABLE toasttest (c char(4096));
0481 -- INSERT INTO toasttest VALUES('x');
0482 -- SELECT length(c), c::text FROM toasttest;
0483 -- SELECT c FROM toasttest;
0484 -- DROP TABLE toasttest;
0485
0486 --
0487 -- test length
0488 --
0489
0490 SELECT length('abcdef') AS `length_6`;
0491
0492 -- [SPARK-27930] Replace strpos with locate or position in Spark SQL
0493 --
0494 -- test strpos
0495 --
0496
0497 SELECT position('cd', 'abcdef') AS `pos_3`;
0498
0499 SELECT position('xy', 'abcdef') AS `pos_0`;
0500
0501 --
0502 -- test replace
0503 --
0504 SELECT replace('abcdef', 'de', '45') AS `abc45f`;
0505
0506 SELECT replace('yabadabadoo', 'ba', '123') AS `ya123da123doo`;
0507
0508 SELECT replace('yabadoo', 'bad', '') AS `yaoo`;
0509
0510 -- [SPARK-28087] Add support split_part
0511 --
0512 -- test split_part
0513 --
0514 -- select split_part('joeuser@mydatabase','@',0) AS "an error";
0515
0516 -- select split_part('joeuser@mydatabase','@',1) AS "joeuser";
0517
0518 -- select split_part('joeuser@mydatabase','@',2) AS "mydatabase";
0519
0520 -- select split_part('joeuser@mydatabase','@',3) AS "empty string";
0521
0522 -- select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
0523
0524 -- [SPARK-27930] Spark SQL use hex
0525 --
0526 -- test to_hex
0527 --
0528 select hex(256*256*256 - 1) AS `ffffff`;
0529
0530 select hex(bigint(bigint(bigint(bigint(256)*256)*256)*256) - 1) AS `ffffffff`;
0531
0532 --
0533 -- MD5 test suite - from IETF RFC 1321
0534 -- (see: ftp://ftp.rfc-editor.org/in-notes/rfc1321.txt)
0535 --
0536 select md5('') = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`;
0537
0538 select md5('a') = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`;
0539
0540 select md5('abc') = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`;
0541
0542 select md5('message digest') = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`;
0543
0544 select md5('abcdefghijklmnopqrstuvwxyz') = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`;
0545
0546 select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`;
0547
0548 select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`;
0549
0550 select md5(binary('')) = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`;
0551
0552 select md5(binary('a')) = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`;
0553
0554 select md5(binary('abc')) = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`;
0555
0556 select md5(binary('message digest')) = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`;
0557
0558 select md5(binary('abcdefghijklmnopqrstuvwxyz')) = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`;
0559
0560 select md5(binary('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`;
0561
0562 select md5(binary('12345678901234567890123456789012345678901234567890123456789012345678901234567890')) = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`;
0563
0564 -- [SPARK-28122] missing SHA functions
0565 --
0566 -- SHA-2
0567 --
0568 -- SET bytea_output TO hex;
0569
0570 -- SELECT sha224('');
0571 -- SELECT sha224('The quick brown fox jumps over the lazy dog.');
0572
0573 -- SELECT sha256('');
0574 -- SELECT sha256('The quick brown fox jumps over the lazy dog.');
0575
0576 -- SELECT sha384('');
0577 -- SELECT sha384('The quick brown fox jumps over the lazy dog.');
0578
0579 -- SELECT sha512('');
0580 -- SELECT sha512('The quick brown fox jumps over the lazy dog.');
0581
0582 -- [SPARK-28449] Missing escape_string_warning and standard_conforming_strings config
0583 --
0584 -- test behavior of escape_string_warning and standard_conforming_strings options
0585 --
0586 -- set escape_string_warning = off;
0587 -- set standard_conforming_strings = off;
0588
0589 -- show escape_string_warning;
0590 -- show standard_conforming_strings;
0591
0592 -- set escape_string_warning = on;
0593 -- set standard_conforming_strings = on;
0594
0595 -- show escape_string_warning;
0596 -- show standard_conforming_strings;
0597
0598 -- select 'a\bcd' as f1, 'a\b''cd' as f2, 'a\b''''cd' as f3, 'abcd\'   as f4, 'ab\''cd' as f5, '\\' as f6;
0599
0600 -- set standard_conforming_strings = off;
0601
0602 -- select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\'   as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
0603
0604 -- set escape_string_warning = off;
0605 -- set standard_conforming_strings = on;
0606
0607 -- select 'a\bcd' as f1, 'a\b''cd' as f2, 'a\b''''cd' as f3, 'abcd\'   as f4, 'ab\''cd' as f5, '\\' as f6;
0608
0609 -- set standard_conforming_strings = off;
0610
0611 -- select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\'   as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
0612
0613
0614 --
0615 -- Additional string functions
0616 --
0617 -- SET bytea_output TO escape;
0618
0619 SELECT initcap('hi THOMAS');
0620
0621 SELECT lpad('hi', 5, 'xy');
0622 SELECT lpad('hi', 5);
0623 SELECT lpad('hi', -5, 'xy');
0624 SELECT lpad('hello', 2);
0625 SELECT lpad('hi', 5, '');
0626
0627 SELECT rpad('hi', 5, 'xy');
0628 SELECT rpad('hi', 5);
0629 SELECT rpad('hi', -5, 'xy');
0630 SELECT rpad('hello', 2);
0631 SELECT rpad('hi', 5, '');
0632
0633 -- skip this test because PostgreSQL has different parameter order compares to SparkSQL
0634 -- SELECT ltrim('zzzytrim', 'xyz');
0635
0636 SELECT translate('', '14', 'ax');
0637 SELECT translate('12345', '14', 'ax');
0638
0639 SELECT ascii('x');
0640 SELECT ascii('');
0641
0642 SELECT chr(65);
0643 -- PostgreSQL throws: ERROR:  null character not permitted
0644 SELECT chr(0);
0645
0646 SELECT repeat('Pg', 4);
0647 SELECT repeat('Pg', -4);
0648
0649 SELECT trim(binary('\\000') from binary('\\000Tom\\000'));
0650 -- [SPARK-28123] Add support btrim
0651 -- SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
0652 -- SELECT btrim(''::bytea, E'\\000'::bytea);
0653 -- SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
0654 -- [SPARK-28121] decode can not accept 'escape' as charset
0655 -- [SPARK-28412][SQL] ANSI SQL: OVERLAY function support byte array
0656 -- SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
0657 -- SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
0658 -- SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
0659
0660 DROP TABLE toasttest;