sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.POINT, 360 TokenType.RING, 361 TokenType.LINESTRING, 362 TokenType.MULTILINESTRING, 363 TokenType.POLYGON, 364 TokenType.MULTIPOLYGON, 365 TokenType.HLLSKETCH, 366 TokenType.HSTORE, 367 TokenType.PSEUDO_TYPE, 368 TokenType.SUPER, 369 TokenType.SERIAL, 370 TokenType.SMALLSERIAL, 371 TokenType.BIGSERIAL, 372 TokenType.XML, 373 TokenType.YEAR, 374 TokenType.UNIQUEIDENTIFIER, 375 TokenType.USERDEFINED, 376 TokenType.MONEY, 377 TokenType.SMALLMONEY, 378 TokenType.ROWVERSION, 379 TokenType.IMAGE, 380 TokenType.VARIANT, 381 TokenType.VECTOR, 382 TokenType.OBJECT, 383 TokenType.OBJECT_IDENTIFIER, 384 TokenType.INET, 385 TokenType.IPADDRESS, 386 TokenType.IPPREFIX, 387 TokenType.IPV4, 388 TokenType.IPV6, 389 TokenType.UNKNOWN, 390 TokenType.NULL, 391 TokenType.NAME, 392 TokenType.TDIGEST, 393 *ENUM_TYPE_TOKENS, 394 *NESTED_TYPE_TOKENS, 395 *AGGREGATE_TYPE_TOKENS, 396 } 397 398 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 399 TokenType.BIGINT: TokenType.UBIGINT, 400 TokenType.INT: TokenType.UINT, 401 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 402 TokenType.SMALLINT: TokenType.USMALLINT, 403 TokenType.TINYINT: TokenType.UTINYINT, 404 TokenType.DECIMAL: TokenType.UDECIMAL, 405 } 406 407 SUBQUERY_PREDICATES = { 408 TokenType.ANY: exp.Any, 409 TokenType.ALL: exp.All, 410 TokenType.EXISTS: exp.Exists, 411 TokenType.SOME: exp.Any, 412 } 413 414 RESERVED_TOKENS = { 415 *Tokenizer.SINGLE_TOKENS.values(), 416 TokenType.SELECT, 417 } - {TokenType.IDENTIFIER} 418 419 DB_CREATABLES = { 420 TokenType.DATABASE, 421 TokenType.DICTIONARY, 422 TokenType.MODEL, 423 TokenType.SCHEMA, 424 TokenType.SEQUENCE, 425 TokenType.STORAGE_INTEGRATION, 426 TokenType.TABLE, 427 TokenType.TAG, 428 TokenType.VIEW, 429 TokenType.WAREHOUSE, 430 TokenType.STREAMLIT, 431 } 432 433 CREATABLES = { 434 TokenType.COLUMN, 435 TokenType.CONSTRAINT, 436 TokenType.FOREIGN_KEY, 437 TokenType.FUNCTION, 438 TokenType.INDEX, 439 TokenType.PROCEDURE, 440 *DB_CREATABLES, 441 } 442 443 ALTERABLES = { 444 TokenType.INDEX, 445 TokenType.TABLE, 446 TokenType.VIEW, 447 } 448 449 # Tokens that can represent identifiers 450 ID_VAR_TOKENS = { 451 TokenType.ALL, 452 TokenType.VAR, 453 TokenType.ANTI, 454 TokenType.APPLY, 455 TokenType.ASC, 456 TokenType.ASOF, 457 TokenType.AUTO_INCREMENT, 458 TokenType.BEGIN, 459 TokenType.BPCHAR, 460 TokenType.CACHE, 461 TokenType.CASE, 462 TokenType.COLLATE, 463 TokenType.COMMAND, 464 TokenType.COMMENT, 465 TokenType.COMMIT, 466 TokenType.CONSTRAINT, 467 TokenType.COPY, 468 TokenType.CUBE, 469 TokenType.DEFAULT, 470 TokenType.DELETE, 471 TokenType.DESC, 472 TokenType.DESCRIBE, 473 TokenType.DICTIONARY, 474 TokenType.DIV, 475 TokenType.END, 476 TokenType.EXECUTE, 477 TokenType.ESCAPE, 478 TokenType.FALSE, 479 TokenType.FIRST, 480 TokenType.FILTER, 481 TokenType.FINAL, 482 TokenType.FORMAT, 483 TokenType.FULL, 484 TokenType.IDENTIFIER, 485 TokenType.IS, 486 TokenType.ISNULL, 487 TokenType.INTERVAL, 488 TokenType.KEEP, 489 TokenType.KILL, 490 TokenType.LEFT, 491 TokenType.LOAD, 492 TokenType.MERGE, 493 TokenType.NATURAL, 494 TokenType.NEXT, 495 TokenType.OFFSET, 496 TokenType.OPERATOR, 497 TokenType.ORDINALITY, 498 TokenType.OVERLAPS, 499 TokenType.OVERWRITE, 500 TokenType.PARTITION, 501 TokenType.PERCENT, 502 TokenType.PIVOT, 503 TokenType.PRAGMA, 504 TokenType.RANGE, 505 TokenType.RECURSIVE, 506 TokenType.REFERENCES, 507 TokenType.REFRESH, 508 TokenType.RENAME, 509 TokenType.REPLACE, 510 TokenType.RIGHT, 511 TokenType.ROLLUP, 512 TokenType.ROW, 513 TokenType.ROWS, 514 TokenType.SEMI, 515 TokenType.SET, 516 TokenType.SETTINGS, 517 TokenType.SHOW, 518 TokenType.TEMPORARY, 519 TokenType.TOP, 520 TokenType.TRUE, 521 TokenType.TRUNCATE, 522 TokenType.UNIQUE, 523 TokenType.UNNEST, 524 TokenType.UNPIVOT, 525 TokenType.UPDATE, 526 TokenType.USE, 527 TokenType.VOLATILE, 528 TokenType.WINDOW, 529 *CREATABLES, 530 *SUBQUERY_PREDICATES, 531 *TYPE_TOKENS, 532 *NO_PAREN_FUNCTIONS, 533 } 534 ID_VAR_TOKENS.remove(TokenType.UNION) 535 536 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 537 538 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 539 TokenType.ANTI, 540 TokenType.APPLY, 541 TokenType.ASOF, 542 TokenType.FULL, 543 TokenType.LEFT, 544 TokenType.LOCK, 545 TokenType.NATURAL, 546 TokenType.OFFSET, 547 TokenType.RIGHT, 548 TokenType.SEMI, 549 TokenType.WINDOW, 550 } 551 552 ALIAS_TOKENS = ID_VAR_TOKENS 553 554 ARRAY_CONSTRUCTORS = { 555 "ARRAY": exp.Array, 556 "LIST": exp.List, 557 } 558 559 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 560 561 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 562 563 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 564 565 FUNC_TOKENS = { 566 TokenType.COLLATE, 567 TokenType.COMMAND, 568 TokenType.CURRENT_DATE, 569 TokenType.CURRENT_DATETIME, 570 TokenType.CURRENT_TIMESTAMP, 571 TokenType.CURRENT_TIME, 572 TokenType.CURRENT_USER, 573 TokenType.FILTER, 574 TokenType.FIRST, 575 TokenType.FORMAT, 576 TokenType.GLOB, 577 TokenType.IDENTIFIER, 578 TokenType.INDEX, 579 TokenType.ISNULL, 580 TokenType.ILIKE, 581 TokenType.INSERT, 582 TokenType.LIKE, 583 TokenType.MERGE, 584 TokenType.OFFSET, 585 TokenType.PRIMARY_KEY, 586 TokenType.RANGE, 587 TokenType.REPLACE, 588 TokenType.RLIKE, 589 TokenType.ROW, 590 TokenType.UNNEST, 591 TokenType.VAR, 592 TokenType.LEFT, 593 TokenType.RIGHT, 594 TokenType.SEQUENCE, 595 TokenType.DATE, 596 TokenType.DATETIME, 597 TokenType.TABLE, 598 TokenType.TIMESTAMP, 599 TokenType.TIMESTAMPTZ, 600 TokenType.TRUNCATE, 601 TokenType.WINDOW, 602 TokenType.XOR, 603 *TYPE_TOKENS, 604 *SUBQUERY_PREDICATES, 605 } 606 607 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 608 TokenType.AND: exp.And, 609 } 610 611 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 612 TokenType.COLON_EQ: exp.PropertyEQ, 613 } 614 615 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 616 TokenType.OR: exp.Or, 617 } 618 619 EQUALITY = { 620 TokenType.EQ: exp.EQ, 621 TokenType.NEQ: exp.NEQ, 622 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 623 } 624 625 COMPARISON = { 626 TokenType.GT: exp.GT, 627 TokenType.GTE: exp.GTE, 628 TokenType.LT: exp.LT, 629 TokenType.LTE: exp.LTE, 630 } 631 632 BITWISE = { 633 TokenType.AMP: exp.BitwiseAnd, 634 TokenType.CARET: exp.BitwiseXor, 635 TokenType.PIPE: exp.BitwiseOr, 636 } 637 638 TERM = { 639 TokenType.DASH: exp.Sub, 640 TokenType.PLUS: exp.Add, 641 TokenType.MOD: exp.Mod, 642 TokenType.COLLATE: exp.Collate, 643 } 644 645 FACTOR = { 646 TokenType.DIV: exp.IntDiv, 647 TokenType.LR_ARROW: exp.Distance, 648 TokenType.SLASH: exp.Div, 649 TokenType.STAR: exp.Mul, 650 } 651 652 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 653 654 TIMES = { 655 TokenType.TIME, 656 TokenType.TIMETZ, 657 } 658 659 TIMESTAMPS = { 660 TokenType.TIMESTAMP, 661 TokenType.TIMESTAMPTZ, 662 TokenType.TIMESTAMPLTZ, 663 *TIMES, 664 } 665 666 SET_OPERATIONS = { 667 TokenType.UNION, 668 TokenType.INTERSECT, 669 TokenType.EXCEPT, 670 } 671 672 JOIN_METHODS = { 673 TokenType.ASOF, 674 TokenType.NATURAL, 675 TokenType.POSITIONAL, 676 } 677 678 JOIN_SIDES = { 679 TokenType.LEFT, 680 TokenType.RIGHT, 681 TokenType.FULL, 682 } 683 684 JOIN_KINDS = { 685 TokenType.ANTI, 686 TokenType.CROSS, 687 TokenType.INNER, 688 TokenType.OUTER, 689 TokenType.SEMI, 690 TokenType.STRAIGHT_JOIN, 691 } 692 693 JOIN_HINTS: t.Set[str] = set() 694 695 LAMBDAS = { 696 TokenType.ARROW: lambda self, expressions: self.expression( 697 exp.Lambda, 698 this=self._replace_lambda( 699 self._parse_assignment(), 700 expressions, 701 ), 702 expressions=expressions, 703 ), 704 TokenType.FARROW: lambda self, expressions: self.expression( 705 exp.Kwarg, 706 this=exp.var(expressions[0].name), 707 expression=self._parse_assignment(), 708 ), 709 } 710 711 COLUMN_OPERATORS = { 712 TokenType.DOT: None, 713 TokenType.DCOLON: lambda self, this, to: self.expression( 714 exp.Cast if self.STRICT_CAST else exp.TryCast, 715 this=this, 716 to=to, 717 ), 718 TokenType.ARROW: lambda self, this, path: self.expression( 719 exp.JSONExtract, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.DARROW: lambda self, this, path: self.expression( 725 exp.JSONExtractScalar, 726 this=this, 727 expression=self.dialect.to_json_path(path), 728 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 729 ), 730 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtract, 732 this=this, 733 expression=path, 734 ), 735 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 736 exp.JSONBExtractScalar, 737 this=this, 738 expression=path, 739 ), 740 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 741 exp.JSONBContains, 742 this=this, 743 expression=key, 744 ), 745 } 746 747 EXPRESSION_PARSERS = { 748 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 749 exp.Column: lambda self: self._parse_column(), 750 exp.Condition: lambda self: self._parse_assignment(), 751 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 752 exp.Expression: lambda self: self._parse_expression(), 753 exp.From: lambda self: self._parse_from(joins=True), 754 exp.Group: lambda self: self._parse_group(), 755 exp.Having: lambda self: self._parse_having(), 756 exp.Identifier: lambda self: self._parse_id_var(), 757 exp.Join: lambda self: self._parse_join(), 758 exp.Lambda: lambda self: self._parse_lambda(), 759 exp.Lateral: lambda self: self._parse_lateral(), 760 exp.Limit: lambda self: self._parse_limit(), 761 exp.Offset: lambda self: self._parse_offset(), 762 exp.Order: lambda self: self._parse_order(), 763 exp.Ordered: lambda self: self._parse_ordered(), 764 exp.Properties: lambda self: self._parse_properties(), 765 exp.Qualify: lambda self: self._parse_qualify(), 766 exp.Returning: lambda self: self._parse_returning(), 767 exp.Select: lambda self: self._parse_select(), 768 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 769 exp.Table: lambda self: self._parse_table_parts(), 770 exp.TableAlias: lambda self: self._parse_table_alias(), 771 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 772 exp.Where: lambda self: self._parse_where(), 773 exp.Window: lambda self: self._parse_named_window(), 774 exp.With: lambda self: self._parse_with(), 775 "JOIN_TYPE": lambda self: self._parse_join_parts(), 776 } 777 778 STATEMENT_PARSERS = { 779 TokenType.ALTER: lambda self: self._parse_alter(), 780 TokenType.BEGIN: lambda self: self._parse_transaction(), 781 TokenType.CACHE: lambda self: self._parse_cache(), 782 TokenType.COMMENT: lambda self: self._parse_comment(), 783 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 784 TokenType.COPY: lambda self: self._parse_copy(), 785 TokenType.CREATE: lambda self: self._parse_create(), 786 TokenType.DELETE: lambda self: self._parse_delete(), 787 TokenType.DESC: lambda self: self._parse_describe(), 788 TokenType.DESCRIBE: lambda self: self._parse_describe(), 789 TokenType.DROP: lambda self: self._parse_drop(), 790 TokenType.GRANT: lambda self: self._parse_grant(), 791 TokenType.INSERT: lambda self: self._parse_insert(), 792 TokenType.KILL: lambda self: self._parse_kill(), 793 TokenType.LOAD: lambda self: self._parse_load(), 794 TokenType.MERGE: lambda self: self._parse_merge(), 795 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 796 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 797 TokenType.REFRESH: lambda self: self._parse_refresh(), 798 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 799 TokenType.SET: lambda self: self._parse_set(), 800 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 801 TokenType.UNCACHE: lambda self: self._parse_uncache(), 802 TokenType.UPDATE: lambda self: self._parse_update(), 803 TokenType.USE: lambda self: self.expression( 804 exp.Use, 805 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 806 this=self._parse_table(schema=False), 807 ), 808 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 809 } 810 811 UNARY_PARSERS = { 812 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 813 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 814 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 815 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 816 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 817 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 818 } 819 820 STRING_PARSERS = { 821 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 822 exp.RawString, this=token.text 823 ), 824 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 825 exp.National, this=token.text 826 ), 827 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 828 TokenType.STRING: lambda self, token: self.expression( 829 exp.Literal, this=token.text, is_string=True 830 ), 831 TokenType.UNICODE_STRING: lambda self, token: self.expression( 832 exp.UnicodeString, 833 this=token.text, 834 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 835 ), 836 } 837 838 NUMERIC_PARSERS = { 839 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 840 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 841 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 842 TokenType.NUMBER: lambda self, token: self.expression( 843 exp.Literal, this=token.text, is_string=False 844 ), 845 } 846 847 PRIMARY_PARSERS = { 848 **STRING_PARSERS, 849 **NUMERIC_PARSERS, 850 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 851 TokenType.NULL: lambda self, _: self.expression(exp.Null), 852 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 853 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 854 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 855 TokenType.STAR: lambda self, _: self._parse_star_ops(), 856 } 857 858 PLACEHOLDER_PARSERS = { 859 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 860 TokenType.PARAMETER: lambda self: self._parse_parameter(), 861 TokenType.COLON: lambda self: ( 862 self.expression(exp.Placeholder, this=self._prev.text) 863 if self._match_set(self.ID_VAR_TOKENS) 864 else None 865 ), 866 } 867 868 RANGE_PARSERS = { 869 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 870 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 871 TokenType.GLOB: binary_range_parser(exp.Glob), 872 TokenType.ILIKE: binary_range_parser(exp.ILike), 873 TokenType.IN: lambda self, this: self._parse_in(this), 874 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 875 TokenType.IS: lambda self, this: self._parse_is(this), 876 TokenType.LIKE: binary_range_parser(exp.Like), 877 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 878 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 879 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 880 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 881 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 882 } 883 884 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 885 "ALLOWED_VALUES": lambda self: self.expression( 886 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 887 ), 888 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 889 "AUTO": lambda self: self._parse_auto_property(), 890 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 891 "BACKUP": lambda self: self.expression( 892 exp.BackupProperty, this=self._parse_var(any_token=True) 893 ), 894 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 895 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 896 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 897 "CHECKSUM": lambda self: self._parse_checksum(), 898 "CLUSTER BY": lambda self: self._parse_cluster(), 899 "CLUSTERED": lambda self: self._parse_clustered_by(), 900 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 901 exp.CollateProperty, **kwargs 902 ), 903 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 904 "CONTAINS": lambda self: self._parse_contains_property(), 905 "COPY": lambda self: self._parse_copy_property(), 906 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 907 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 908 "DEFINER": lambda self: self._parse_definer(), 909 "DETERMINISTIC": lambda self: self.expression( 910 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 911 ), 912 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 913 "DUPLICATE": lambda self: self._parse_duplicate(), 914 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 915 "DISTKEY": lambda self: self._parse_distkey(), 916 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 917 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 918 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 919 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 920 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 921 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 922 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 923 "FREESPACE": lambda self: self._parse_freespace(), 924 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 925 "HEAP": lambda self: self.expression(exp.HeapProperty), 926 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 927 "IMMUTABLE": lambda self: self.expression( 928 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 929 ), 930 "INHERITS": lambda self: self.expression( 931 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 932 ), 933 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 934 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 935 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 936 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 937 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 938 "LIKE": lambda self: self._parse_create_like(), 939 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 940 "LOCK": lambda self: self._parse_locking(), 941 "LOCKING": lambda self: self._parse_locking(), 942 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 943 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 944 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 945 "MODIFIES": lambda self: self._parse_modifies_property(), 946 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 947 "NO": lambda self: self._parse_no_property(), 948 "ON": lambda self: self._parse_on_property(), 949 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 950 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 951 "PARTITION": lambda self: self._parse_partitioned_of(), 952 "PARTITION BY": lambda self: self._parse_partitioned_by(), 953 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 954 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 955 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 956 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 957 "READS": lambda self: self._parse_reads_property(), 958 "REMOTE": lambda self: self._parse_remote_with_connection(), 959 "RETURNS": lambda self: self._parse_returns(), 960 "STRICT": lambda self: self.expression(exp.StrictProperty), 961 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 962 "ROW": lambda self: self._parse_row(), 963 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 964 "SAMPLE": lambda self: self.expression( 965 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 966 ), 967 "SECURE": lambda self: self.expression(exp.SecureProperty), 968 "SECURITY": lambda self: self._parse_security(), 969 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 970 "SETTINGS": lambda self: self._parse_settings_property(), 971 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 972 "SORTKEY": lambda self: self._parse_sortkey(), 973 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 974 "STABLE": lambda self: self.expression( 975 exp.StabilityProperty, this=exp.Literal.string("STABLE") 976 ), 977 "STORED": lambda self: self._parse_stored(), 978 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 979 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 980 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 981 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 982 "TO": lambda self: self._parse_to_table(), 983 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 984 "TRANSFORM": lambda self: self.expression( 985 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 986 ), 987 "TTL": lambda self: self._parse_ttl(), 988 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 989 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 990 "VOLATILE": lambda self: self._parse_volatile_property(), 991 "WITH": lambda self: self._parse_with_property(), 992 } 993 994 CONSTRAINT_PARSERS = { 995 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 996 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 997 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 998 "CHARACTER SET": lambda self: self.expression( 999 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1000 ), 1001 "CHECK": lambda self: self.expression( 1002 exp.CheckColumnConstraint, 1003 this=self._parse_wrapped(self._parse_assignment), 1004 enforced=self._match_text_seq("ENFORCED"), 1005 ), 1006 "COLLATE": lambda self: self.expression( 1007 exp.CollateColumnConstraint, 1008 this=self._parse_identifier() or self._parse_column(), 1009 ), 1010 "COMMENT": lambda self: self.expression( 1011 exp.CommentColumnConstraint, this=self._parse_string() 1012 ), 1013 "COMPRESS": lambda self: self._parse_compress(), 1014 "CLUSTERED": lambda self: self.expression( 1015 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1016 ), 1017 "NONCLUSTERED": lambda self: self.expression( 1018 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1019 ), 1020 "DEFAULT": lambda self: self.expression( 1021 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1022 ), 1023 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1024 "EPHEMERAL": lambda self: self.expression( 1025 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1026 ), 1027 "EXCLUDE": lambda self: self.expression( 1028 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1029 ), 1030 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1031 "FORMAT": lambda self: self.expression( 1032 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1033 ), 1034 "GENERATED": lambda self: self._parse_generated_as_identity(), 1035 "IDENTITY": lambda self: self._parse_auto_increment(), 1036 "INLINE": lambda self: self._parse_inline(), 1037 "LIKE": lambda self: self._parse_create_like(), 1038 "NOT": lambda self: self._parse_not_constraint(), 1039 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1040 "ON": lambda self: ( 1041 self._match(TokenType.UPDATE) 1042 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1043 ) 1044 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1045 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1046 "PERIOD": lambda self: self._parse_period_for_system_time(), 1047 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1048 "REFERENCES": lambda self: self._parse_references(match=False), 1049 "TITLE": lambda self: self.expression( 1050 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1051 ), 1052 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1053 "UNIQUE": lambda self: self._parse_unique(), 1054 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1055 "WITH": lambda self: self.expression( 1056 exp.Properties, expressions=self._parse_wrapped_properties() 1057 ), 1058 } 1059 1060 ALTER_PARSERS = { 1061 "ADD": lambda self: self._parse_alter_table_add(), 1062 "AS": lambda self: self._parse_select(), 1063 "ALTER": lambda self: self._parse_alter_table_alter(), 1064 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1065 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1066 "DROP": lambda self: self._parse_alter_table_drop(), 1067 "RENAME": lambda self: self._parse_alter_table_rename(), 1068 "SET": lambda self: self._parse_alter_table_set(), 1069 "SWAP": lambda self: self.expression( 1070 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1071 ), 1072 } 1073 1074 ALTER_ALTER_PARSERS = { 1075 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1076 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1077 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1078 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1079 } 1080 1081 SCHEMA_UNNAMED_CONSTRAINTS = { 1082 "CHECK", 1083 "EXCLUDE", 1084 "FOREIGN KEY", 1085 "LIKE", 1086 "PERIOD", 1087 "PRIMARY KEY", 1088 "UNIQUE", 1089 } 1090 1091 NO_PAREN_FUNCTION_PARSERS = { 1092 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1093 "CASE": lambda self: self._parse_case(), 1094 "CONNECT_BY_ROOT": lambda self: self.expression( 1095 exp.ConnectByRoot, this=self._parse_column() 1096 ), 1097 "IF": lambda self: self._parse_if(), 1098 "NEXT": lambda self: self._parse_next_value_for(), 1099 } 1100 1101 INVALID_FUNC_NAME_TOKENS = { 1102 TokenType.IDENTIFIER, 1103 TokenType.STRING, 1104 } 1105 1106 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1107 1108 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1109 1110 FUNCTION_PARSERS = { 1111 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1112 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1113 "DECODE": lambda self: self._parse_decode(), 1114 "EXTRACT": lambda self: self._parse_extract(), 1115 "GAP_FILL": lambda self: self._parse_gap_fill(), 1116 "JSON_OBJECT": lambda self: self._parse_json_object(), 1117 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1118 "JSON_TABLE": lambda self: self._parse_json_table(), 1119 "MATCH": lambda self: self._parse_match_against(), 1120 "NORMALIZE": lambda self: self._parse_normalize(), 1121 "OPENJSON": lambda self: self._parse_open_json(), 1122 "OVERLAY": lambda self: self._parse_overlay(), 1123 "POSITION": lambda self: self._parse_position(), 1124 "PREDICT": lambda self: self._parse_predict(), 1125 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1126 "STRING_AGG": lambda self: self._parse_string_agg(), 1127 "SUBSTRING": lambda self: self._parse_substring(), 1128 "TRIM": lambda self: self._parse_trim(), 1129 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1130 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1131 } 1132 1133 QUERY_MODIFIER_PARSERS = { 1134 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1135 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1136 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1137 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1138 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1139 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1140 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1141 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1142 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1143 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1144 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1145 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1146 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1147 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1148 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1149 TokenType.CLUSTER_BY: lambda self: ( 1150 "cluster", 1151 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1152 ), 1153 TokenType.DISTRIBUTE_BY: lambda self: ( 1154 "distribute", 1155 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1156 ), 1157 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1158 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1159 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1160 } 1161 1162 SET_PARSERS = { 1163 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1164 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1165 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1166 "TRANSACTION": lambda self: self._parse_set_transaction(), 1167 } 1168 1169 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1170 1171 TYPE_LITERAL_PARSERS = { 1172 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1173 } 1174 1175 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1176 1177 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1178 1179 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1180 1181 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1182 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1183 "ISOLATION": ( 1184 ("LEVEL", "REPEATABLE", "READ"), 1185 ("LEVEL", "READ", "COMMITTED"), 1186 ("LEVEL", "READ", "UNCOMITTED"), 1187 ("LEVEL", "SERIALIZABLE"), 1188 ), 1189 "READ": ("WRITE", "ONLY"), 1190 } 1191 1192 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1193 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1194 ) 1195 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1196 1197 CREATE_SEQUENCE: OPTIONS_TYPE = { 1198 "SCALE": ("EXTEND", "NOEXTEND"), 1199 "SHARD": ("EXTEND", "NOEXTEND"), 1200 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1201 **dict.fromkeys( 1202 ( 1203 "SESSION", 1204 "GLOBAL", 1205 "KEEP", 1206 "NOKEEP", 1207 "ORDER", 1208 "NOORDER", 1209 "NOCACHE", 1210 "CYCLE", 1211 "NOCYCLE", 1212 "NOMINVALUE", 1213 "NOMAXVALUE", 1214 "NOSCALE", 1215 "NOSHARD", 1216 ), 1217 tuple(), 1218 ), 1219 } 1220 1221 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1222 1223 USABLES: OPTIONS_TYPE = dict.fromkeys( 1224 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1225 ) 1226 1227 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1228 1229 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1230 "TYPE": ("EVOLUTION",), 1231 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1232 } 1233 1234 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1235 1236 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1237 1238 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1239 "NOT": ("ENFORCED",), 1240 "MATCH": ( 1241 "FULL", 1242 "PARTIAL", 1243 "SIMPLE", 1244 ), 1245 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1246 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1247 } 1248 1249 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1250 1251 CLONE_KEYWORDS = {"CLONE", "COPY"} 1252 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1253 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1254 1255 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1256 1257 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1258 1259 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1260 1261 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1262 1263 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1264 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1265 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1266 1267 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1268 1269 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1270 1271 ADD_CONSTRAINT_TOKENS = { 1272 TokenType.CONSTRAINT, 1273 TokenType.FOREIGN_KEY, 1274 TokenType.INDEX, 1275 TokenType.KEY, 1276 TokenType.PRIMARY_KEY, 1277 TokenType.UNIQUE, 1278 } 1279 1280 DISTINCT_TOKENS = {TokenType.DISTINCT} 1281 1282 NULL_TOKENS = {TokenType.NULL} 1283 1284 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1285 1286 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1287 1288 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1289 1290 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1291 1292 ODBC_DATETIME_LITERALS = { 1293 "d": exp.Date, 1294 "t": exp.Time, 1295 "ts": exp.Timestamp, 1296 } 1297 1298 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1299 1300 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1301 1302 # The style options for the DESCRIBE statement 1303 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1304 1305 OPERATION_MODIFIERS: t.Set[str] = set() 1306 1307 STRICT_CAST = True 1308 1309 PREFIXED_PIVOT_COLUMNS = False 1310 IDENTIFY_PIVOT_STRINGS = False 1311 1312 LOG_DEFAULTS_TO_LN = False 1313 1314 # Whether ADD is present for each column added by ALTER TABLE 1315 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1316 1317 # Whether the table sample clause expects CSV syntax 1318 TABLESAMPLE_CSV = False 1319 1320 # The default method used for table sampling 1321 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1322 1323 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1324 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1325 1326 # Whether the TRIM function expects the characters to trim as its first argument 1327 TRIM_PATTERN_FIRST = False 1328 1329 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1330 STRING_ALIASES = False 1331 1332 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1333 MODIFIERS_ATTACHED_TO_SET_OP = True 1334 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1335 1336 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1337 NO_PAREN_IF_COMMANDS = True 1338 1339 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1340 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1341 1342 # Whether the `:` operator is used to extract a value from a VARIANT column 1343 COLON_IS_VARIANT_EXTRACT = False 1344 1345 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1346 # If this is True and '(' is not found, the keyword will be treated as an identifier 1347 VALUES_FOLLOWED_BY_PAREN = True 1348 1349 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1350 SUPPORTS_IMPLICIT_UNNEST = False 1351 1352 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1353 INTERVAL_SPANS = True 1354 1355 # Whether a PARTITION clause can follow a table reference 1356 SUPPORTS_PARTITION_SELECTION = False 1357 1358 __slots__ = ( 1359 "error_level", 1360 "error_message_context", 1361 "max_errors", 1362 "dialect", 1363 "sql", 1364 "errors", 1365 "_tokens", 1366 "_index", 1367 "_curr", 1368 "_next", 1369 "_prev", 1370 "_prev_comments", 1371 ) 1372 1373 # Autofilled 1374 SHOW_TRIE: t.Dict = {} 1375 SET_TRIE: t.Dict = {} 1376 1377 def __init__( 1378 self, 1379 error_level: t.Optional[ErrorLevel] = None, 1380 error_message_context: int = 100, 1381 max_errors: int = 3, 1382 dialect: DialectType = None, 1383 ): 1384 from sqlglot.dialects import Dialect 1385 1386 self.error_level = error_level or ErrorLevel.IMMEDIATE 1387 self.error_message_context = error_message_context 1388 self.max_errors = max_errors 1389 self.dialect = Dialect.get_or_raise(dialect) 1390 self.reset() 1391 1392 def reset(self): 1393 self.sql = "" 1394 self.errors = [] 1395 self._tokens = [] 1396 self._index = 0 1397 self._curr = None 1398 self._next = None 1399 self._prev = None 1400 self._prev_comments = None 1401 1402 def parse( 1403 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1404 ) -> t.List[t.Optional[exp.Expression]]: 1405 """ 1406 Parses a list of tokens and returns a list of syntax trees, one tree 1407 per parsed SQL statement. 1408 1409 Args: 1410 raw_tokens: The list of tokens. 1411 sql: The original SQL string, used to produce helpful debug messages. 1412 1413 Returns: 1414 The list of the produced syntax trees. 1415 """ 1416 return self._parse( 1417 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1418 ) 1419 1420 def parse_into( 1421 self, 1422 expression_types: exp.IntoType, 1423 raw_tokens: t.List[Token], 1424 sql: t.Optional[str] = None, 1425 ) -> t.List[t.Optional[exp.Expression]]: 1426 """ 1427 Parses a list of tokens into a given Expression type. If a collection of Expression 1428 types is given instead, this method will try to parse the token list into each one 1429 of them, stopping at the first for which the parsing succeeds. 1430 1431 Args: 1432 expression_types: The expression type(s) to try and parse the token list into. 1433 raw_tokens: The list of tokens. 1434 sql: The original SQL string, used to produce helpful debug messages. 1435 1436 Returns: 1437 The target Expression. 1438 """ 1439 errors = [] 1440 for expression_type in ensure_list(expression_types): 1441 parser = self.EXPRESSION_PARSERS.get(expression_type) 1442 if not parser: 1443 raise TypeError(f"No parser registered for {expression_type}") 1444 1445 try: 1446 return self._parse(parser, raw_tokens, sql) 1447 except ParseError as e: 1448 e.errors[0]["into_expression"] = expression_type 1449 errors.append(e) 1450 1451 raise ParseError( 1452 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1453 errors=merge_errors(errors), 1454 ) from errors[-1] 1455 1456 def _parse( 1457 self, 1458 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1459 raw_tokens: t.List[Token], 1460 sql: t.Optional[str] = None, 1461 ) -> t.List[t.Optional[exp.Expression]]: 1462 self.reset() 1463 self.sql = sql or "" 1464 1465 total = len(raw_tokens) 1466 chunks: t.List[t.List[Token]] = [[]] 1467 1468 for i, token in enumerate(raw_tokens): 1469 if token.token_type == TokenType.SEMICOLON: 1470 if token.comments: 1471 chunks.append([token]) 1472 1473 if i < total - 1: 1474 chunks.append([]) 1475 else: 1476 chunks[-1].append(token) 1477 1478 expressions = [] 1479 1480 for tokens in chunks: 1481 self._index = -1 1482 self._tokens = tokens 1483 self._advance() 1484 1485 expressions.append(parse_method(self)) 1486 1487 if self._index < len(self._tokens): 1488 self.raise_error("Invalid expression / Unexpected token") 1489 1490 self.check_errors() 1491 1492 return expressions 1493 1494 def check_errors(self) -> None: 1495 """Logs or raises any found errors, depending on the chosen error level setting.""" 1496 if self.error_level == ErrorLevel.WARN: 1497 for error in self.errors: 1498 logger.error(str(error)) 1499 elif self.error_level == ErrorLevel.RAISE and self.errors: 1500 raise ParseError( 1501 concat_messages(self.errors, self.max_errors), 1502 errors=merge_errors(self.errors), 1503 ) 1504 1505 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1506 """ 1507 Appends an error in the list of recorded errors or raises it, depending on the chosen 1508 error level setting. 1509 """ 1510 token = token or self._curr or self._prev or Token.string("") 1511 start = token.start 1512 end = token.end + 1 1513 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1514 highlight = self.sql[start:end] 1515 end_context = self.sql[end : end + self.error_message_context] 1516 1517 error = ParseError.new( 1518 f"{message}. Line {token.line}, Col: {token.col}.\n" 1519 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1520 description=message, 1521 line=token.line, 1522 col=token.col, 1523 start_context=start_context, 1524 highlight=highlight, 1525 end_context=end_context, 1526 ) 1527 1528 if self.error_level == ErrorLevel.IMMEDIATE: 1529 raise error 1530 1531 self.errors.append(error) 1532 1533 def expression( 1534 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1535 ) -> E: 1536 """ 1537 Creates a new, validated Expression. 1538 1539 Args: 1540 exp_class: The expression class to instantiate. 1541 comments: An optional list of comments to attach to the expression. 1542 kwargs: The arguments to set for the expression along with their respective values. 1543 1544 Returns: 1545 The target expression. 1546 """ 1547 instance = exp_class(**kwargs) 1548 instance.add_comments(comments) if comments else self._add_comments(instance) 1549 return self.validate_expression(instance) 1550 1551 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1552 if expression and self._prev_comments: 1553 expression.add_comments(self._prev_comments) 1554 self._prev_comments = None 1555 1556 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1557 """ 1558 Validates an Expression, making sure that all its mandatory arguments are set. 1559 1560 Args: 1561 expression: The expression to validate. 1562 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1563 1564 Returns: 1565 The validated expression. 1566 """ 1567 if self.error_level != ErrorLevel.IGNORE: 1568 for error_message in expression.error_messages(args): 1569 self.raise_error(error_message) 1570 1571 return expression 1572 1573 def _find_sql(self, start: Token, end: Token) -> str: 1574 return self.sql[start.start : end.end + 1] 1575 1576 def _is_connected(self) -> bool: 1577 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1578 1579 def _advance(self, times: int = 1) -> None: 1580 self._index += times 1581 self._curr = seq_get(self._tokens, self._index) 1582 self._next = seq_get(self._tokens, self._index + 1) 1583 1584 if self._index > 0: 1585 self._prev = self._tokens[self._index - 1] 1586 self._prev_comments = self._prev.comments 1587 else: 1588 self._prev = None 1589 self._prev_comments = None 1590 1591 def _retreat(self, index: int) -> None: 1592 if index != self._index: 1593 self._advance(index - self._index) 1594 1595 def _warn_unsupported(self) -> None: 1596 if len(self._tokens) <= 1: 1597 return 1598 1599 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1600 # interested in emitting a warning for the one being currently processed. 1601 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1602 1603 logger.warning( 1604 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1605 ) 1606 1607 def _parse_command(self) -> exp.Command: 1608 self._warn_unsupported() 1609 return self.expression( 1610 exp.Command, 1611 comments=self._prev_comments, 1612 this=self._prev.text.upper(), 1613 expression=self._parse_string(), 1614 ) 1615 1616 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1617 """ 1618 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1619 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1620 solve this by setting & resetting the parser state accordingly 1621 """ 1622 index = self._index 1623 error_level = self.error_level 1624 1625 self.error_level = ErrorLevel.IMMEDIATE 1626 try: 1627 this = parse_method() 1628 except ParseError: 1629 this = None 1630 finally: 1631 if not this or retreat: 1632 self._retreat(index) 1633 self.error_level = error_level 1634 1635 return this 1636 1637 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1638 start = self._prev 1639 exists = self._parse_exists() if allow_exists else None 1640 1641 self._match(TokenType.ON) 1642 1643 materialized = self._match_text_seq("MATERIALIZED") 1644 kind = self._match_set(self.CREATABLES) and self._prev 1645 if not kind: 1646 return self._parse_as_command(start) 1647 1648 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1649 this = self._parse_user_defined_function(kind=kind.token_type) 1650 elif kind.token_type == TokenType.TABLE: 1651 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1652 elif kind.token_type == TokenType.COLUMN: 1653 this = self._parse_column() 1654 else: 1655 this = self._parse_id_var() 1656 1657 self._match(TokenType.IS) 1658 1659 return self.expression( 1660 exp.Comment, 1661 this=this, 1662 kind=kind.text, 1663 expression=self._parse_string(), 1664 exists=exists, 1665 materialized=materialized, 1666 ) 1667 1668 def _parse_to_table( 1669 self, 1670 ) -> exp.ToTableProperty: 1671 table = self._parse_table_parts(schema=True) 1672 return self.expression(exp.ToTableProperty, this=table) 1673 1674 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1675 def _parse_ttl(self) -> exp.Expression: 1676 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1677 this = self._parse_bitwise() 1678 1679 if self._match_text_seq("DELETE"): 1680 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1681 if self._match_text_seq("RECOMPRESS"): 1682 return self.expression( 1683 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1684 ) 1685 if self._match_text_seq("TO", "DISK"): 1686 return self.expression( 1687 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1688 ) 1689 if self._match_text_seq("TO", "VOLUME"): 1690 return self.expression( 1691 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1692 ) 1693 1694 return this 1695 1696 expressions = self._parse_csv(_parse_ttl_action) 1697 where = self._parse_where() 1698 group = self._parse_group() 1699 1700 aggregates = None 1701 if group and self._match(TokenType.SET): 1702 aggregates = self._parse_csv(self._parse_set_item) 1703 1704 return self.expression( 1705 exp.MergeTreeTTL, 1706 expressions=expressions, 1707 where=where, 1708 group=group, 1709 aggregates=aggregates, 1710 ) 1711 1712 def _parse_statement(self) -> t.Optional[exp.Expression]: 1713 if self._curr is None: 1714 return None 1715 1716 if self._match_set(self.STATEMENT_PARSERS): 1717 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1718 1719 if self._match_set(self.dialect.tokenizer.COMMANDS): 1720 return self._parse_command() 1721 1722 expression = self._parse_expression() 1723 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1724 return self._parse_query_modifiers(expression) 1725 1726 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1727 start = self._prev 1728 temporary = self._match(TokenType.TEMPORARY) 1729 materialized = self._match_text_seq("MATERIALIZED") 1730 1731 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1732 if not kind: 1733 return self._parse_as_command(start) 1734 1735 concurrently = self._match_text_seq("CONCURRENTLY") 1736 if_exists = exists or self._parse_exists() 1737 table = self._parse_table_parts( 1738 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1739 ) 1740 1741 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1742 1743 if self._match(TokenType.L_PAREN, advance=False): 1744 expressions = self._parse_wrapped_csv(self._parse_types) 1745 else: 1746 expressions = None 1747 1748 return self.expression( 1749 exp.Drop, 1750 comments=start.comments, 1751 exists=if_exists, 1752 this=table, 1753 expressions=expressions, 1754 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1755 temporary=temporary, 1756 materialized=materialized, 1757 cascade=self._match_text_seq("CASCADE"), 1758 constraints=self._match_text_seq("CONSTRAINTS"), 1759 purge=self._match_text_seq("PURGE"), 1760 cluster=cluster, 1761 concurrently=concurrently, 1762 ) 1763 1764 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1765 return ( 1766 self._match_text_seq("IF") 1767 and (not not_ or self._match(TokenType.NOT)) 1768 and self._match(TokenType.EXISTS) 1769 ) 1770 1771 def _parse_create(self) -> exp.Create | exp.Command: 1772 # Note: this can't be None because we've matched a statement parser 1773 start = self._prev 1774 comments = self._prev_comments 1775 1776 replace = ( 1777 start.token_type == TokenType.REPLACE 1778 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1779 or self._match_pair(TokenType.OR, TokenType.ALTER) 1780 ) 1781 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1782 1783 unique = self._match(TokenType.UNIQUE) 1784 1785 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1786 clustered = True 1787 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1788 "COLUMNSTORE" 1789 ): 1790 clustered = False 1791 else: 1792 clustered = None 1793 1794 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1795 self._advance() 1796 1797 properties = None 1798 create_token = self._match_set(self.CREATABLES) and self._prev 1799 1800 if not create_token: 1801 # exp.Properties.Location.POST_CREATE 1802 properties = self._parse_properties() 1803 create_token = self._match_set(self.CREATABLES) and self._prev 1804 1805 if not properties or not create_token: 1806 return self._parse_as_command(start) 1807 1808 concurrently = self._match_text_seq("CONCURRENTLY") 1809 exists = self._parse_exists(not_=True) 1810 this = None 1811 expression: t.Optional[exp.Expression] = None 1812 indexes = None 1813 no_schema_binding = None 1814 begin = None 1815 end = None 1816 clone = None 1817 1818 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1819 nonlocal properties 1820 if properties and temp_props: 1821 properties.expressions.extend(temp_props.expressions) 1822 elif temp_props: 1823 properties = temp_props 1824 1825 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1826 this = self._parse_user_defined_function(kind=create_token.token_type) 1827 1828 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1829 extend_props(self._parse_properties()) 1830 1831 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1832 extend_props(self._parse_properties()) 1833 1834 if not expression: 1835 if self._match(TokenType.COMMAND): 1836 expression = self._parse_as_command(self._prev) 1837 else: 1838 begin = self._match(TokenType.BEGIN) 1839 return_ = self._match_text_seq("RETURN") 1840 1841 if self._match(TokenType.STRING, advance=False): 1842 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1843 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1844 expression = self._parse_string() 1845 extend_props(self._parse_properties()) 1846 else: 1847 expression = self._parse_statement() 1848 1849 end = self._match_text_seq("END") 1850 1851 if return_: 1852 expression = self.expression(exp.Return, this=expression) 1853 elif create_token.token_type == TokenType.INDEX: 1854 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1855 if not self._match(TokenType.ON): 1856 index = self._parse_id_var() 1857 anonymous = False 1858 else: 1859 index = None 1860 anonymous = True 1861 1862 this = self._parse_index(index=index, anonymous=anonymous) 1863 elif create_token.token_type in self.DB_CREATABLES: 1864 table_parts = self._parse_table_parts( 1865 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1866 ) 1867 1868 # exp.Properties.Location.POST_NAME 1869 self._match(TokenType.COMMA) 1870 extend_props(self._parse_properties(before=True)) 1871 1872 this = self._parse_schema(this=table_parts) 1873 1874 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1875 extend_props(self._parse_properties()) 1876 1877 self._match(TokenType.ALIAS) 1878 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1879 # exp.Properties.Location.POST_ALIAS 1880 extend_props(self._parse_properties()) 1881 1882 if create_token.token_type == TokenType.SEQUENCE: 1883 expression = self._parse_types() 1884 extend_props(self._parse_properties()) 1885 else: 1886 expression = self._parse_ddl_select() 1887 1888 if create_token.token_type == TokenType.TABLE: 1889 # exp.Properties.Location.POST_EXPRESSION 1890 extend_props(self._parse_properties()) 1891 1892 indexes = [] 1893 while True: 1894 index = self._parse_index() 1895 1896 # exp.Properties.Location.POST_INDEX 1897 extend_props(self._parse_properties()) 1898 if not index: 1899 break 1900 else: 1901 self._match(TokenType.COMMA) 1902 indexes.append(index) 1903 elif create_token.token_type == TokenType.VIEW: 1904 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1905 no_schema_binding = True 1906 1907 shallow = self._match_text_seq("SHALLOW") 1908 1909 if self._match_texts(self.CLONE_KEYWORDS): 1910 copy = self._prev.text.lower() == "copy" 1911 clone = self.expression( 1912 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1913 ) 1914 1915 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1916 return self._parse_as_command(start) 1917 1918 create_kind_text = create_token.text.upper() 1919 return self.expression( 1920 exp.Create, 1921 comments=comments, 1922 this=this, 1923 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1924 replace=replace, 1925 refresh=refresh, 1926 unique=unique, 1927 expression=expression, 1928 exists=exists, 1929 properties=properties, 1930 indexes=indexes, 1931 no_schema_binding=no_schema_binding, 1932 begin=begin, 1933 end=end, 1934 clone=clone, 1935 concurrently=concurrently, 1936 clustered=clustered, 1937 ) 1938 1939 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1940 seq = exp.SequenceProperties() 1941 1942 options = [] 1943 index = self._index 1944 1945 while self._curr: 1946 self._match(TokenType.COMMA) 1947 if self._match_text_seq("INCREMENT"): 1948 self._match_text_seq("BY") 1949 self._match_text_seq("=") 1950 seq.set("increment", self._parse_term()) 1951 elif self._match_text_seq("MINVALUE"): 1952 seq.set("minvalue", self._parse_term()) 1953 elif self._match_text_seq("MAXVALUE"): 1954 seq.set("maxvalue", self._parse_term()) 1955 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1956 self._match_text_seq("=") 1957 seq.set("start", self._parse_term()) 1958 elif self._match_text_seq("CACHE"): 1959 # T-SQL allows empty CACHE which is initialized dynamically 1960 seq.set("cache", self._parse_number() or True) 1961 elif self._match_text_seq("OWNED", "BY"): 1962 # "OWNED BY NONE" is the default 1963 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1964 else: 1965 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1966 if opt: 1967 options.append(opt) 1968 else: 1969 break 1970 1971 seq.set("options", options if options else None) 1972 return None if self._index == index else seq 1973 1974 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1975 # only used for teradata currently 1976 self._match(TokenType.COMMA) 1977 1978 kwargs = { 1979 "no": self._match_text_seq("NO"), 1980 "dual": self._match_text_seq("DUAL"), 1981 "before": self._match_text_seq("BEFORE"), 1982 "default": self._match_text_seq("DEFAULT"), 1983 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1984 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1985 "after": self._match_text_seq("AFTER"), 1986 "minimum": self._match_texts(("MIN", "MINIMUM")), 1987 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1988 } 1989 1990 if self._match_texts(self.PROPERTY_PARSERS): 1991 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1992 try: 1993 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1994 except TypeError: 1995 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1996 1997 return None 1998 1999 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2000 return self._parse_wrapped_csv(self._parse_property) 2001 2002 def _parse_property(self) -> t.Optional[exp.Expression]: 2003 if self._match_texts(self.PROPERTY_PARSERS): 2004 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2005 2006 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2007 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2008 2009 if self._match_text_seq("COMPOUND", "SORTKEY"): 2010 return self._parse_sortkey(compound=True) 2011 2012 if self._match_text_seq("SQL", "SECURITY"): 2013 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2014 2015 index = self._index 2016 key = self._parse_column() 2017 2018 if not self._match(TokenType.EQ): 2019 self._retreat(index) 2020 return self._parse_sequence_properties() 2021 2022 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2023 if isinstance(key, exp.Column): 2024 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2025 2026 value = self._parse_bitwise() or self._parse_var(any_token=True) 2027 2028 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2029 if isinstance(value, exp.Column): 2030 value = exp.var(value.name) 2031 2032 return self.expression(exp.Property, this=key, value=value) 2033 2034 def _parse_stored(self) -> exp.FileFormatProperty: 2035 self._match(TokenType.ALIAS) 2036 2037 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2038 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2039 2040 return self.expression( 2041 exp.FileFormatProperty, 2042 this=( 2043 self.expression( 2044 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2045 ) 2046 if input_format or output_format 2047 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2048 ), 2049 ) 2050 2051 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2052 field = self._parse_field() 2053 if isinstance(field, exp.Identifier) and not field.quoted: 2054 field = exp.var(field) 2055 2056 return field 2057 2058 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2059 self._match(TokenType.EQ) 2060 self._match(TokenType.ALIAS) 2061 2062 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2063 2064 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2065 properties = [] 2066 while True: 2067 if before: 2068 prop = self._parse_property_before() 2069 else: 2070 prop = self._parse_property() 2071 if not prop: 2072 break 2073 for p in ensure_list(prop): 2074 properties.append(p) 2075 2076 if properties: 2077 return self.expression(exp.Properties, expressions=properties) 2078 2079 return None 2080 2081 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2082 return self.expression( 2083 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2084 ) 2085 2086 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2087 if self._match_texts(("DEFINER", "INVOKER")): 2088 security_specifier = self._prev.text.upper() 2089 return self.expression(exp.SecurityProperty, this=security_specifier) 2090 return None 2091 2092 def _parse_settings_property(self) -> exp.SettingsProperty: 2093 return self.expression( 2094 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2095 ) 2096 2097 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2098 if self._index >= 2: 2099 pre_volatile_token = self._tokens[self._index - 2] 2100 else: 2101 pre_volatile_token = None 2102 2103 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2104 return exp.VolatileProperty() 2105 2106 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2107 2108 def _parse_retention_period(self) -> exp.Var: 2109 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2110 number = self._parse_number() 2111 number_str = f"{number} " if number else "" 2112 unit = self._parse_var(any_token=True) 2113 return exp.var(f"{number_str}{unit}") 2114 2115 def _parse_system_versioning_property( 2116 self, with_: bool = False 2117 ) -> exp.WithSystemVersioningProperty: 2118 self._match(TokenType.EQ) 2119 prop = self.expression( 2120 exp.WithSystemVersioningProperty, 2121 **{ # type: ignore 2122 "on": True, 2123 "with": with_, 2124 }, 2125 ) 2126 2127 if self._match_text_seq("OFF"): 2128 prop.set("on", False) 2129 return prop 2130 2131 self._match(TokenType.ON) 2132 if self._match(TokenType.L_PAREN): 2133 while self._curr and not self._match(TokenType.R_PAREN): 2134 if self._match_text_seq("HISTORY_TABLE", "="): 2135 prop.set("this", self._parse_table_parts()) 2136 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2137 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2138 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2139 prop.set("retention_period", self._parse_retention_period()) 2140 2141 self._match(TokenType.COMMA) 2142 2143 return prop 2144 2145 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2146 self._match(TokenType.EQ) 2147 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2148 prop = self.expression(exp.DataDeletionProperty, on=on) 2149 2150 if self._match(TokenType.L_PAREN): 2151 while self._curr and not self._match(TokenType.R_PAREN): 2152 if self._match_text_seq("FILTER_COLUMN", "="): 2153 prop.set("filter_column", self._parse_column()) 2154 elif self._match_text_seq("RETENTION_PERIOD", "="): 2155 prop.set("retention_period", self._parse_retention_period()) 2156 2157 self._match(TokenType.COMMA) 2158 2159 return prop 2160 2161 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2162 kind = "HASH" 2163 expressions: t.Optional[t.List[exp.Expression]] = None 2164 if self._match_text_seq("BY", "HASH"): 2165 expressions = self._parse_wrapped_csv(self._parse_id_var) 2166 elif self._match_text_seq("BY", "RANDOM"): 2167 kind = "RANDOM" 2168 2169 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2170 buckets: t.Optional[exp.Expression] = None 2171 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2172 buckets = self._parse_number() 2173 2174 return self.expression( 2175 exp.DistributedByProperty, 2176 expressions=expressions, 2177 kind=kind, 2178 buckets=buckets, 2179 order=self._parse_order(), 2180 ) 2181 2182 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2183 self._match_text_seq("KEY") 2184 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2185 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2186 2187 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2188 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2189 prop = self._parse_system_versioning_property(with_=True) 2190 self._match_r_paren() 2191 return prop 2192 2193 if self._match(TokenType.L_PAREN, advance=False): 2194 return self._parse_wrapped_properties() 2195 2196 if self._match_text_seq("JOURNAL"): 2197 return self._parse_withjournaltable() 2198 2199 if self._match_texts(self.VIEW_ATTRIBUTES): 2200 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2201 2202 if self._match_text_seq("DATA"): 2203 return self._parse_withdata(no=False) 2204 elif self._match_text_seq("NO", "DATA"): 2205 return self._parse_withdata(no=True) 2206 2207 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2208 return self._parse_serde_properties(with_=True) 2209 2210 if self._match(TokenType.SCHEMA): 2211 return self.expression( 2212 exp.WithSchemaBindingProperty, 2213 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2214 ) 2215 2216 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2217 return self.expression( 2218 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2219 ) 2220 2221 if not self._next: 2222 return None 2223 2224 return self._parse_withisolatedloading() 2225 2226 def _parse_procedure_option(self) -> exp.Expression | None: 2227 if self._match_text_seq("EXECUTE", "AS"): 2228 return self.expression( 2229 exp.ExecuteAsProperty, 2230 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2231 or self._parse_string(), 2232 ) 2233 2234 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2235 2236 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2237 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2238 self._match(TokenType.EQ) 2239 2240 user = self._parse_id_var() 2241 self._match(TokenType.PARAMETER) 2242 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2243 2244 if not user or not host: 2245 return None 2246 2247 return exp.DefinerProperty(this=f"{user}@{host}") 2248 2249 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2250 self._match(TokenType.TABLE) 2251 self._match(TokenType.EQ) 2252 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2253 2254 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2255 return self.expression(exp.LogProperty, no=no) 2256 2257 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2258 return self.expression(exp.JournalProperty, **kwargs) 2259 2260 def _parse_checksum(self) -> exp.ChecksumProperty: 2261 self._match(TokenType.EQ) 2262 2263 on = None 2264 if self._match(TokenType.ON): 2265 on = True 2266 elif self._match_text_seq("OFF"): 2267 on = False 2268 2269 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2270 2271 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2272 return self.expression( 2273 exp.Cluster, 2274 expressions=( 2275 self._parse_wrapped_csv(self._parse_ordered) 2276 if wrapped 2277 else self._parse_csv(self._parse_ordered) 2278 ), 2279 ) 2280 2281 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2282 self._match_text_seq("BY") 2283 2284 self._match_l_paren() 2285 expressions = self._parse_csv(self._parse_column) 2286 self._match_r_paren() 2287 2288 if self._match_text_seq("SORTED", "BY"): 2289 self._match_l_paren() 2290 sorted_by = self._parse_csv(self._parse_ordered) 2291 self._match_r_paren() 2292 else: 2293 sorted_by = None 2294 2295 self._match(TokenType.INTO) 2296 buckets = self._parse_number() 2297 self._match_text_seq("BUCKETS") 2298 2299 return self.expression( 2300 exp.ClusteredByProperty, 2301 expressions=expressions, 2302 sorted_by=sorted_by, 2303 buckets=buckets, 2304 ) 2305 2306 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2307 if not self._match_text_seq("GRANTS"): 2308 self._retreat(self._index - 1) 2309 return None 2310 2311 return self.expression(exp.CopyGrantsProperty) 2312 2313 def _parse_freespace(self) -> exp.FreespaceProperty: 2314 self._match(TokenType.EQ) 2315 return self.expression( 2316 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2317 ) 2318 2319 def _parse_mergeblockratio( 2320 self, no: bool = False, default: bool = False 2321 ) -> exp.MergeBlockRatioProperty: 2322 if self._match(TokenType.EQ): 2323 return self.expression( 2324 exp.MergeBlockRatioProperty, 2325 this=self._parse_number(), 2326 percent=self._match(TokenType.PERCENT), 2327 ) 2328 2329 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2330 2331 def _parse_datablocksize( 2332 self, 2333 default: t.Optional[bool] = None, 2334 minimum: t.Optional[bool] = None, 2335 maximum: t.Optional[bool] = None, 2336 ) -> exp.DataBlocksizeProperty: 2337 self._match(TokenType.EQ) 2338 size = self._parse_number() 2339 2340 units = None 2341 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2342 units = self._prev.text 2343 2344 return self.expression( 2345 exp.DataBlocksizeProperty, 2346 size=size, 2347 units=units, 2348 default=default, 2349 minimum=minimum, 2350 maximum=maximum, 2351 ) 2352 2353 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2354 self._match(TokenType.EQ) 2355 always = self._match_text_seq("ALWAYS") 2356 manual = self._match_text_seq("MANUAL") 2357 never = self._match_text_seq("NEVER") 2358 default = self._match_text_seq("DEFAULT") 2359 2360 autotemp = None 2361 if self._match_text_seq("AUTOTEMP"): 2362 autotemp = self._parse_schema() 2363 2364 return self.expression( 2365 exp.BlockCompressionProperty, 2366 always=always, 2367 manual=manual, 2368 never=never, 2369 default=default, 2370 autotemp=autotemp, 2371 ) 2372 2373 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2374 index = self._index 2375 no = self._match_text_seq("NO") 2376 concurrent = self._match_text_seq("CONCURRENT") 2377 2378 if not self._match_text_seq("ISOLATED", "LOADING"): 2379 self._retreat(index) 2380 return None 2381 2382 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2383 return self.expression( 2384 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2385 ) 2386 2387 def _parse_locking(self) -> exp.LockingProperty: 2388 if self._match(TokenType.TABLE): 2389 kind = "TABLE" 2390 elif self._match(TokenType.VIEW): 2391 kind = "VIEW" 2392 elif self._match(TokenType.ROW): 2393 kind = "ROW" 2394 elif self._match_text_seq("DATABASE"): 2395 kind = "DATABASE" 2396 else: 2397 kind = None 2398 2399 if kind in ("DATABASE", "TABLE", "VIEW"): 2400 this = self._parse_table_parts() 2401 else: 2402 this = None 2403 2404 if self._match(TokenType.FOR): 2405 for_or_in = "FOR" 2406 elif self._match(TokenType.IN): 2407 for_or_in = "IN" 2408 else: 2409 for_or_in = None 2410 2411 if self._match_text_seq("ACCESS"): 2412 lock_type = "ACCESS" 2413 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2414 lock_type = "EXCLUSIVE" 2415 elif self._match_text_seq("SHARE"): 2416 lock_type = "SHARE" 2417 elif self._match_text_seq("READ"): 2418 lock_type = "READ" 2419 elif self._match_text_seq("WRITE"): 2420 lock_type = "WRITE" 2421 elif self._match_text_seq("CHECKSUM"): 2422 lock_type = "CHECKSUM" 2423 else: 2424 lock_type = None 2425 2426 override = self._match_text_seq("OVERRIDE") 2427 2428 return self.expression( 2429 exp.LockingProperty, 2430 this=this, 2431 kind=kind, 2432 for_or_in=for_or_in, 2433 lock_type=lock_type, 2434 override=override, 2435 ) 2436 2437 def _parse_partition_by(self) -> t.List[exp.Expression]: 2438 if self._match(TokenType.PARTITION_BY): 2439 return self._parse_csv(self._parse_assignment) 2440 return [] 2441 2442 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2443 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2444 if self._match_text_seq("MINVALUE"): 2445 return exp.var("MINVALUE") 2446 if self._match_text_seq("MAXVALUE"): 2447 return exp.var("MAXVALUE") 2448 return self._parse_bitwise() 2449 2450 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2451 expression = None 2452 from_expressions = None 2453 to_expressions = None 2454 2455 if self._match(TokenType.IN): 2456 this = self._parse_wrapped_csv(self._parse_bitwise) 2457 elif self._match(TokenType.FROM): 2458 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2459 self._match_text_seq("TO") 2460 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2461 elif self._match_text_seq("WITH", "(", "MODULUS"): 2462 this = self._parse_number() 2463 self._match_text_seq(",", "REMAINDER") 2464 expression = self._parse_number() 2465 self._match_r_paren() 2466 else: 2467 self.raise_error("Failed to parse partition bound spec.") 2468 2469 return self.expression( 2470 exp.PartitionBoundSpec, 2471 this=this, 2472 expression=expression, 2473 from_expressions=from_expressions, 2474 to_expressions=to_expressions, 2475 ) 2476 2477 # https://www.postgresql.org/docs/current/sql-createtable.html 2478 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2479 if not self._match_text_seq("OF"): 2480 self._retreat(self._index - 1) 2481 return None 2482 2483 this = self._parse_table(schema=True) 2484 2485 if self._match(TokenType.DEFAULT): 2486 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2487 elif self._match_text_seq("FOR", "VALUES"): 2488 expression = self._parse_partition_bound_spec() 2489 else: 2490 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2491 2492 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2493 2494 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2495 self._match(TokenType.EQ) 2496 return self.expression( 2497 exp.PartitionedByProperty, 2498 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2499 ) 2500 2501 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2502 if self._match_text_seq("AND", "STATISTICS"): 2503 statistics = True 2504 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2505 statistics = False 2506 else: 2507 statistics = None 2508 2509 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2510 2511 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2512 if self._match_text_seq("SQL"): 2513 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2514 return None 2515 2516 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2517 if self._match_text_seq("SQL", "DATA"): 2518 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2519 return None 2520 2521 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2522 if self._match_text_seq("PRIMARY", "INDEX"): 2523 return exp.NoPrimaryIndexProperty() 2524 if self._match_text_seq("SQL"): 2525 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2526 return None 2527 2528 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2529 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2530 return exp.OnCommitProperty() 2531 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2532 return exp.OnCommitProperty(delete=True) 2533 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2534 2535 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2536 if self._match_text_seq("SQL", "DATA"): 2537 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2538 return None 2539 2540 def _parse_distkey(self) -> exp.DistKeyProperty: 2541 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2542 2543 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2544 table = self._parse_table(schema=True) 2545 2546 options = [] 2547 while self._match_texts(("INCLUDING", "EXCLUDING")): 2548 this = self._prev.text.upper() 2549 2550 id_var = self._parse_id_var() 2551 if not id_var: 2552 return None 2553 2554 options.append( 2555 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2556 ) 2557 2558 return self.expression(exp.LikeProperty, this=table, expressions=options) 2559 2560 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2561 return self.expression( 2562 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2563 ) 2564 2565 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2566 self._match(TokenType.EQ) 2567 return self.expression( 2568 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2569 ) 2570 2571 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2572 self._match_text_seq("WITH", "CONNECTION") 2573 return self.expression( 2574 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2575 ) 2576 2577 def _parse_returns(self) -> exp.ReturnsProperty: 2578 value: t.Optional[exp.Expression] 2579 null = None 2580 is_table = self._match(TokenType.TABLE) 2581 2582 if is_table: 2583 if self._match(TokenType.LT): 2584 value = self.expression( 2585 exp.Schema, 2586 this="TABLE", 2587 expressions=self._parse_csv(self._parse_struct_types), 2588 ) 2589 if not self._match(TokenType.GT): 2590 self.raise_error("Expecting >") 2591 else: 2592 value = self._parse_schema(exp.var("TABLE")) 2593 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2594 null = True 2595 value = None 2596 else: 2597 value = self._parse_types() 2598 2599 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2600 2601 def _parse_describe(self) -> exp.Describe: 2602 kind = self._match_set(self.CREATABLES) and self._prev.text 2603 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2604 if self._match(TokenType.DOT): 2605 style = None 2606 self._retreat(self._index - 2) 2607 this = self._parse_table(schema=True) 2608 properties = self._parse_properties() 2609 expressions = properties.expressions if properties else None 2610 partition = self._parse_partition() 2611 return self.expression( 2612 exp.Describe, 2613 this=this, 2614 style=style, 2615 kind=kind, 2616 expressions=expressions, 2617 partition=partition, 2618 ) 2619 2620 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2621 kind = self._prev.text.upper() 2622 expressions = [] 2623 2624 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2625 if self._match(TokenType.WHEN): 2626 expression = self._parse_disjunction() 2627 self._match(TokenType.THEN) 2628 else: 2629 expression = None 2630 2631 else_ = self._match(TokenType.ELSE) 2632 2633 if not self._match(TokenType.INTO): 2634 return None 2635 2636 return self.expression( 2637 exp.ConditionalInsert, 2638 this=self.expression( 2639 exp.Insert, 2640 this=self._parse_table(schema=True), 2641 expression=self._parse_derived_table_values(), 2642 ), 2643 expression=expression, 2644 else_=else_, 2645 ) 2646 2647 expression = parse_conditional_insert() 2648 while expression is not None: 2649 expressions.append(expression) 2650 expression = parse_conditional_insert() 2651 2652 return self.expression( 2653 exp.MultitableInserts, 2654 kind=kind, 2655 comments=comments, 2656 expressions=expressions, 2657 source=self._parse_table(), 2658 ) 2659 2660 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2661 comments = ensure_list(self._prev_comments) 2662 hint = self._parse_hint() 2663 overwrite = self._match(TokenType.OVERWRITE) 2664 ignore = self._match(TokenType.IGNORE) 2665 local = self._match_text_seq("LOCAL") 2666 alternative = None 2667 is_function = None 2668 2669 if self._match_text_seq("DIRECTORY"): 2670 this: t.Optional[exp.Expression] = self.expression( 2671 exp.Directory, 2672 this=self._parse_var_or_string(), 2673 local=local, 2674 row_format=self._parse_row_format(match_row=True), 2675 ) 2676 else: 2677 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2678 comments += ensure_list(self._prev_comments) 2679 return self._parse_multitable_inserts(comments) 2680 2681 if self._match(TokenType.OR): 2682 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2683 2684 self._match(TokenType.INTO) 2685 comments += ensure_list(self._prev_comments) 2686 self._match(TokenType.TABLE) 2687 is_function = self._match(TokenType.FUNCTION) 2688 2689 this = ( 2690 self._parse_table(schema=True, parse_partition=True) 2691 if not is_function 2692 else self._parse_function() 2693 ) 2694 2695 returning = self._parse_returning() 2696 2697 return self.expression( 2698 exp.Insert, 2699 comments=comments, 2700 hint=hint, 2701 is_function=is_function, 2702 this=this, 2703 stored=self._match_text_seq("STORED") and self._parse_stored(), 2704 by_name=self._match_text_seq("BY", "NAME"), 2705 exists=self._parse_exists(), 2706 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2707 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2708 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2709 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2710 conflict=self._parse_on_conflict(), 2711 returning=returning or self._parse_returning(), 2712 overwrite=overwrite, 2713 alternative=alternative, 2714 ignore=ignore, 2715 source=self._match(TokenType.TABLE) and self._parse_table(), 2716 ) 2717 2718 def _parse_kill(self) -> exp.Kill: 2719 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2720 2721 return self.expression( 2722 exp.Kill, 2723 this=self._parse_primary(), 2724 kind=kind, 2725 ) 2726 2727 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2728 conflict = self._match_text_seq("ON", "CONFLICT") 2729 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2730 2731 if not conflict and not duplicate: 2732 return None 2733 2734 conflict_keys = None 2735 constraint = None 2736 2737 if conflict: 2738 if self._match_text_seq("ON", "CONSTRAINT"): 2739 constraint = self._parse_id_var() 2740 elif self._match(TokenType.L_PAREN): 2741 conflict_keys = self._parse_csv(self._parse_id_var) 2742 self._match_r_paren() 2743 2744 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2745 if self._prev.token_type == TokenType.UPDATE: 2746 self._match(TokenType.SET) 2747 expressions = self._parse_csv(self._parse_equality) 2748 else: 2749 expressions = None 2750 2751 return self.expression( 2752 exp.OnConflict, 2753 duplicate=duplicate, 2754 expressions=expressions, 2755 action=action, 2756 conflict_keys=conflict_keys, 2757 constraint=constraint, 2758 ) 2759 2760 def _parse_returning(self) -> t.Optional[exp.Returning]: 2761 if not self._match(TokenType.RETURNING): 2762 return None 2763 return self.expression( 2764 exp.Returning, 2765 expressions=self._parse_csv(self._parse_expression), 2766 into=self._match(TokenType.INTO) and self._parse_table_part(), 2767 ) 2768 2769 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2770 if not self._match(TokenType.FORMAT): 2771 return None 2772 return self._parse_row_format() 2773 2774 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2775 index = self._index 2776 with_ = with_ or self._match_text_seq("WITH") 2777 2778 if not self._match(TokenType.SERDE_PROPERTIES): 2779 self._retreat(index) 2780 return None 2781 return self.expression( 2782 exp.SerdeProperties, 2783 **{ # type: ignore 2784 "expressions": self._parse_wrapped_properties(), 2785 "with": with_, 2786 }, 2787 ) 2788 2789 def _parse_row_format( 2790 self, match_row: bool = False 2791 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2792 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2793 return None 2794 2795 if self._match_text_seq("SERDE"): 2796 this = self._parse_string() 2797 2798 serde_properties = self._parse_serde_properties() 2799 2800 return self.expression( 2801 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2802 ) 2803 2804 self._match_text_seq("DELIMITED") 2805 2806 kwargs = {} 2807 2808 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2809 kwargs["fields"] = self._parse_string() 2810 if self._match_text_seq("ESCAPED", "BY"): 2811 kwargs["escaped"] = self._parse_string() 2812 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2813 kwargs["collection_items"] = self._parse_string() 2814 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2815 kwargs["map_keys"] = self._parse_string() 2816 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2817 kwargs["lines"] = self._parse_string() 2818 if self._match_text_seq("NULL", "DEFINED", "AS"): 2819 kwargs["null"] = self._parse_string() 2820 2821 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2822 2823 def _parse_load(self) -> exp.LoadData | exp.Command: 2824 if self._match_text_seq("DATA"): 2825 local = self._match_text_seq("LOCAL") 2826 self._match_text_seq("INPATH") 2827 inpath = self._parse_string() 2828 overwrite = self._match(TokenType.OVERWRITE) 2829 self._match_pair(TokenType.INTO, TokenType.TABLE) 2830 2831 return self.expression( 2832 exp.LoadData, 2833 this=self._parse_table(schema=True), 2834 local=local, 2835 overwrite=overwrite, 2836 inpath=inpath, 2837 partition=self._parse_partition(), 2838 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2839 serde=self._match_text_seq("SERDE") and self._parse_string(), 2840 ) 2841 return self._parse_as_command(self._prev) 2842 2843 def _parse_delete(self) -> exp.Delete: 2844 # This handles MySQL's "Multiple-Table Syntax" 2845 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2846 tables = None 2847 comments = self._prev_comments 2848 if not self._match(TokenType.FROM, advance=False): 2849 tables = self._parse_csv(self._parse_table) or None 2850 2851 returning = self._parse_returning() 2852 2853 return self.expression( 2854 exp.Delete, 2855 comments=comments, 2856 tables=tables, 2857 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2858 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2859 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2860 where=self._parse_where(), 2861 returning=returning or self._parse_returning(), 2862 limit=self._parse_limit(), 2863 ) 2864 2865 def _parse_update(self) -> exp.Update: 2866 comments = self._prev_comments 2867 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2868 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2869 returning = self._parse_returning() 2870 return self.expression( 2871 exp.Update, 2872 comments=comments, 2873 **{ # type: ignore 2874 "this": this, 2875 "expressions": expressions, 2876 "from": self._parse_from(joins=True), 2877 "where": self._parse_where(), 2878 "returning": returning or self._parse_returning(), 2879 "order": self._parse_order(), 2880 "limit": self._parse_limit(), 2881 }, 2882 ) 2883 2884 def _parse_uncache(self) -> exp.Uncache: 2885 if not self._match(TokenType.TABLE): 2886 self.raise_error("Expecting TABLE after UNCACHE") 2887 2888 return self.expression( 2889 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2890 ) 2891 2892 def _parse_cache(self) -> exp.Cache: 2893 lazy = self._match_text_seq("LAZY") 2894 self._match(TokenType.TABLE) 2895 table = self._parse_table(schema=True) 2896 2897 options = [] 2898 if self._match_text_seq("OPTIONS"): 2899 self._match_l_paren() 2900 k = self._parse_string() 2901 self._match(TokenType.EQ) 2902 v = self._parse_string() 2903 options = [k, v] 2904 self._match_r_paren() 2905 2906 self._match(TokenType.ALIAS) 2907 return self.expression( 2908 exp.Cache, 2909 this=table, 2910 lazy=lazy, 2911 options=options, 2912 expression=self._parse_select(nested=True), 2913 ) 2914 2915 def _parse_partition(self) -> t.Optional[exp.Partition]: 2916 if not self._match(TokenType.PARTITION): 2917 return None 2918 2919 return self.expression( 2920 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2921 ) 2922 2923 def _parse_value(self) -> t.Optional[exp.Tuple]: 2924 if self._match(TokenType.L_PAREN): 2925 expressions = self._parse_csv(self._parse_expression) 2926 self._match_r_paren() 2927 return self.expression(exp.Tuple, expressions=expressions) 2928 2929 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2930 expression = self._parse_expression() 2931 if expression: 2932 return self.expression(exp.Tuple, expressions=[expression]) 2933 return None 2934 2935 def _parse_projections(self) -> t.List[exp.Expression]: 2936 return self._parse_expressions() 2937 2938 def _parse_select( 2939 self, 2940 nested: bool = False, 2941 table: bool = False, 2942 parse_subquery_alias: bool = True, 2943 parse_set_operation: bool = True, 2944 ) -> t.Optional[exp.Expression]: 2945 cte = self._parse_with() 2946 2947 if cte: 2948 this = self._parse_statement() 2949 2950 if not this: 2951 self.raise_error("Failed to parse any statement following CTE") 2952 return cte 2953 2954 if "with" in this.arg_types: 2955 this.set("with", cte) 2956 else: 2957 self.raise_error(f"{this.key} does not support CTE") 2958 this = cte 2959 2960 return this 2961 2962 # duckdb supports leading with FROM x 2963 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2964 2965 if self._match(TokenType.SELECT): 2966 comments = self._prev_comments 2967 2968 hint = self._parse_hint() 2969 2970 if self._next and not self._next.token_type == TokenType.DOT: 2971 all_ = self._match(TokenType.ALL) 2972 distinct = self._match_set(self.DISTINCT_TOKENS) 2973 else: 2974 all_, distinct = None, None 2975 2976 kind = ( 2977 self._match(TokenType.ALIAS) 2978 and self._match_texts(("STRUCT", "VALUE")) 2979 and self._prev.text.upper() 2980 ) 2981 2982 if distinct: 2983 distinct = self.expression( 2984 exp.Distinct, 2985 on=self._parse_value() if self._match(TokenType.ON) else None, 2986 ) 2987 2988 if all_ and distinct: 2989 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2990 2991 operation_modifiers = [] 2992 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2993 operation_modifiers.append(exp.var(self._prev.text.upper())) 2994 2995 limit = self._parse_limit(top=True) 2996 projections = self._parse_projections() 2997 2998 this = self.expression( 2999 exp.Select, 3000 kind=kind, 3001 hint=hint, 3002 distinct=distinct, 3003 expressions=projections, 3004 limit=limit, 3005 operation_modifiers=operation_modifiers or None, 3006 ) 3007 this.comments = comments 3008 3009 into = self._parse_into() 3010 if into: 3011 this.set("into", into) 3012 3013 if not from_: 3014 from_ = self._parse_from() 3015 3016 if from_: 3017 this.set("from", from_) 3018 3019 this = self._parse_query_modifiers(this) 3020 elif (table or nested) and self._match(TokenType.L_PAREN): 3021 if self._match(TokenType.PIVOT): 3022 this = self._parse_simplified_pivot() 3023 elif self._match(TokenType.FROM): 3024 this = exp.select("*").from_( 3025 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3026 ) 3027 else: 3028 this = ( 3029 self._parse_table() 3030 if table 3031 else self._parse_select(nested=True, parse_set_operation=False) 3032 ) 3033 3034 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3035 # in case a modifier (e.g. join) is following 3036 if table and isinstance(this, exp.Values) and this.alias: 3037 alias = this.args["alias"].pop() 3038 this = exp.Table(this=this, alias=alias) 3039 3040 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3041 3042 self._match_r_paren() 3043 3044 # We return early here so that the UNION isn't attached to the subquery by the 3045 # following call to _parse_set_operations, but instead becomes the parent node 3046 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3047 elif self._match(TokenType.VALUES, advance=False): 3048 this = self._parse_derived_table_values() 3049 elif from_: 3050 this = exp.select("*").from_(from_.this, copy=False) 3051 elif self._match(TokenType.SUMMARIZE): 3052 table = self._match(TokenType.TABLE) 3053 this = self._parse_select() or self._parse_string() or self._parse_table() 3054 return self.expression(exp.Summarize, this=this, table=table) 3055 elif self._match(TokenType.DESCRIBE): 3056 this = self._parse_describe() 3057 elif self._match_text_seq("STREAM"): 3058 this = self.expression(exp.Stream, this=self._parse_function()) 3059 else: 3060 this = None 3061 3062 return self._parse_set_operations(this) if parse_set_operation else this 3063 3064 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3065 if not skip_with_token and not self._match(TokenType.WITH): 3066 return None 3067 3068 comments = self._prev_comments 3069 recursive = self._match(TokenType.RECURSIVE) 3070 3071 last_comments = None 3072 expressions = [] 3073 while True: 3074 expressions.append(self._parse_cte()) 3075 if last_comments: 3076 expressions[-1].add_comments(last_comments) 3077 3078 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3079 break 3080 else: 3081 self._match(TokenType.WITH) 3082 3083 last_comments = self._prev_comments 3084 3085 return self.expression( 3086 exp.With, comments=comments, expressions=expressions, recursive=recursive 3087 ) 3088 3089 def _parse_cte(self) -> exp.CTE: 3090 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3091 if not alias or not alias.this: 3092 self.raise_error("Expected CTE to have alias") 3093 3094 self._match(TokenType.ALIAS) 3095 comments = self._prev_comments 3096 3097 if self._match_text_seq("NOT", "MATERIALIZED"): 3098 materialized = False 3099 elif self._match_text_seq("MATERIALIZED"): 3100 materialized = True 3101 else: 3102 materialized = None 3103 3104 return self.expression( 3105 exp.CTE, 3106 this=self._parse_wrapped(self._parse_statement), 3107 alias=alias, 3108 materialized=materialized, 3109 comments=comments, 3110 ) 3111 3112 def _parse_table_alias( 3113 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3114 ) -> t.Optional[exp.TableAlias]: 3115 any_token = self._match(TokenType.ALIAS) 3116 alias = ( 3117 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3118 or self._parse_string_as_identifier() 3119 ) 3120 3121 index = self._index 3122 if self._match(TokenType.L_PAREN): 3123 columns = self._parse_csv(self._parse_function_parameter) 3124 self._match_r_paren() if columns else self._retreat(index) 3125 else: 3126 columns = None 3127 3128 if not alias and not columns: 3129 return None 3130 3131 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3132 3133 # We bubble up comments from the Identifier to the TableAlias 3134 if isinstance(alias, exp.Identifier): 3135 table_alias.add_comments(alias.pop_comments()) 3136 3137 return table_alias 3138 3139 def _parse_subquery( 3140 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3141 ) -> t.Optional[exp.Subquery]: 3142 if not this: 3143 return None 3144 3145 return self.expression( 3146 exp.Subquery, 3147 this=this, 3148 pivots=self._parse_pivots(), 3149 alias=self._parse_table_alias() if parse_alias else None, 3150 sample=self._parse_table_sample(), 3151 ) 3152 3153 def _implicit_unnests_to_explicit(self, this: E) -> E: 3154 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3155 3156 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3157 for i, join in enumerate(this.args.get("joins") or []): 3158 table = join.this 3159 normalized_table = table.copy() 3160 normalized_table.meta["maybe_column"] = True 3161 normalized_table = _norm(normalized_table, dialect=self.dialect) 3162 3163 if isinstance(table, exp.Table) and not join.args.get("on"): 3164 if normalized_table.parts[0].name in refs: 3165 table_as_column = table.to_column() 3166 unnest = exp.Unnest(expressions=[table_as_column]) 3167 3168 # Table.to_column creates a parent Alias node that we want to convert to 3169 # a TableAlias and attach to the Unnest, so it matches the parser's output 3170 if isinstance(table.args.get("alias"), exp.TableAlias): 3171 table_as_column.replace(table_as_column.this) 3172 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3173 3174 table.replace(unnest) 3175 3176 refs.add(normalized_table.alias_or_name) 3177 3178 return this 3179 3180 def _parse_query_modifiers( 3181 self, this: t.Optional[exp.Expression] 3182 ) -> t.Optional[exp.Expression]: 3183 if isinstance(this, (exp.Query, exp.Table)): 3184 for join in self._parse_joins(): 3185 this.append("joins", join) 3186 for lateral in iter(self._parse_lateral, None): 3187 this.append("laterals", lateral) 3188 3189 while True: 3190 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3191 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3192 key, expression = parser(self) 3193 3194 if expression: 3195 this.set(key, expression) 3196 if key == "limit": 3197 offset = expression.args.pop("offset", None) 3198 3199 if offset: 3200 offset = exp.Offset(expression=offset) 3201 this.set("offset", offset) 3202 3203 limit_by_expressions = expression.expressions 3204 expression.set("expressions", None) 3205 offset.set("expressions", limit_by_expressions) 3206 continue 3207 break 3208 3209 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3210 this = self._implicit_unnests_to_explicit(this) 3211 3212 return this 3213 3214 def _parse_hint(self) -> t.Optional[exp.Hint]: 3215 if self._match(TokenType.HINT): 3216 hints = [] 3217 for hint in iter( 3218 lambda: self._parse_csv( 3219 lambda: self._parse_function() or self._parse_var(upper=True) 3220 ), 3221 [], 3222 ): 3223 hints.extend(hint) 3224 3225 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3226 self.raise_error("Expected */ after HINT") 3227 3228 return self.expression(exp.Hint, expressions=hints) 3229 3230 return None 3231 3232 def _parse_into(self) -> t.Optional[exp.Into]: 3233 if not self._match(TokenType.INTO): 3234 return None 3235 3236 temp = self._match(TokenType.TEMPORARY) 3237 unlogged = self._match_text_seq("UNLOGGED") 3238 self._match(TokenType.TABLE) 3239 3240 return self.expression( 3241 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3242 ) 3243 3244 def _parse_from( 3245 self, joins: bool = False, skip_from_token: bool = False 3246 ) -> t.Optional[exp.From]: 3247 if not skip_from_token and not self._match(TokenType.FROM): 3248 return None 3249 3250 return self.expression( 3251 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3252 ) 3253 3254 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3255 return self.expression( 3256 exp.MatchRecognizeMeasure, 3257 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3258 this=self._parse_expression(), 3259 ) 3260 3261 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3262 if not self._match(TokenType.MATCH_RECOGNIZE): 3263 return None 3264 3265 self._match_l_paren() 3266 3267 partition = self._parse_partition_by() 3268 order = self._parse_order() 3269 3270 measures = ( 3271 self._parse_csv(self._parse_match_recognize_measure) 3272 if self._match_text_seq("MEASURES") 3273 else None 3274 ) 3275 3276 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3277 rows = exp.var("ONE ROW PER MATCH") 3278 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3279 text = "ALL ROWS PER MATCH" 3280 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3281 text += " SHOW EMPTY MATCHES" 3282 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3283 text += " OMIT EMPTY MATCHES" 3284 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3285 text += " WITH UNMATCHED ROWS" 3286 rows = exp.var(text) 3287 else: 3288 rows = None 3289 3290 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3291 text = "AFTER MATCH SKIP" 3292 if self._match_text_seq("PAST", "LAST", "ROW"): 3293 text += " PAST LAST ROW" 3294 elif self._match_text_seq("TO", "NEXT", "ROW"): 3295 text += " TO NEXT ROW" 3296 elif self._match_text_seq("TO", "FIRST"): 3297 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3298 elif self._match_text_seq("TO", "LAST"): 3299 text += f" TO LAST {self._advance_any().text}" # type: ignore 3300 after = exp.var(text) 3301 else: 3302 after = None 3303 3304 if self._match_text_seq("PATTERN"): 3305 self._match_l_paren() 3306 3307 if not self._curr: 3308 self.raise_error("Expecting )", self._curr) 3309 3310 paren = 1 3311 start = self._curr 3312 3313 while self._curr and paren > 0: 3314 if self._curr.token_type == TokenType.L_PAREN: 3315 paren += 1 3316 if self._curr.token_type == TokenType.R_PAREN: 3317 paren -= 1 3318 3319 end = self._prev 3320 self._advance() 3321 3322 if paren > 0: 3323 self.raise_error("Expecting )", self._curr) 3324 3325 pattern = exp.var(self._find_sql(start, end)) 3326 else: 3327 pattern = None 3328 3329 define = ( 3330 self._parse_csv(self._parse_name_as_expression) 3331 if self._match_text_seq("DEFINE") 3332 else None 3333 ) 3334 3335 self._match_r_paren() 3336 3337 return self.expression( 3338 exp.MatchRecognize, 3339 partition_by=partition, 3340 order=order, 3341 measures=measures, 3342 rows=rows, 3343 after=after, 3344 pattern=pattern, 3345 define=define, 3346 alias=self._parse_table_alias(), 3347 ) 3348 3349 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3350 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3351 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3352 cross_apply = False 3353 3354 if cross_apply is not None: 3355 this = self._parse_select(table=True) 3356 view = None 3357 outer = None 3358 elif self._match(TokenType.LATERAL): 3359 this = self._parse_select(table=True) 3360 view = self._match(TokenType.VIEW) 3361 outer = self._match(TokenType.OUTER) 3362 else: 3363 return None 3364 3365 if not this: 3366 this = ( 3367 self._parse_unnest() 3368 or self._parse_function() 3369 or self._parse_id_var(any_token=False) 3370 ) 3371 3372 while self._match(TokenType.DOT): 3373 this = exp.Dot( 3374 this=this, 3375 expression=self._parse_function() or self._parse_id_var(any_token=False), 3376 ) 3377 3378 if view: 3379 table = self._parse_id_var(any_token=False) 3380 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3381 table_alias: t.Optional[exp.TableAlias] = self.expression( 3382 exp.TableAlias, this=table, columns=columns 3383 ) 3384 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3385 # We move the alias from the lateral's child node to the lateral itself 3386 table_alias = this.args["alias"].pop() 3387 else: 3388 table_alias = self._parse_table_alias() 3389 3390 return self.expression( 3391 exp.Lateral, 3392 this=this, 3393 view=view, 3394 outer=outer, 3395 alias=table_alias, 3396 cross_apply=cross_apply, 3397 ) 3398 3399 def _parse_join_parts( 3400 self, 3401 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3402 return ( 3403 self._match_set(self.JOIN_METHODS) and self._prev, 3404 self._match_set(self.JOIN_SIDES) and self._prev, 3405 self._match_set(self.JOIN_KINDS) and self._prev, 3406 ) 3407 3408 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3409 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3410 this = self._parse_column() 3411 if isinstance(this, exp.Column): 3412 return this.this 3413 return this 3414 3415 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3416 3417 def _parse_join( 3418 self, skip_join_token: bool = False, parse_bracket: bool = False 3419 ) -> t.Optional[exp.Join]: 3420 if self._match(TokenType.COMMA): 3421 return self.expression(exp.Join, this=self._parse_table()) 3422 3423 index = self._index 3424 method, side, kind = self._parse_join_parts() 3425 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3426 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3427 3428 if not skip_join_token and not join: 3429 self._retreat(index) 3430 kind = None 3431 method = None 3432 side = None 3433 3434 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3435 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3436 3437 if not skip_join_token and not join and not outer_apply and not cross_apply: 3438 return None 3439 3440 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3441 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3442 kwargs["expressions"] = self._parse_csv( 3443 lambda: self._parse_table(parse_bracket=parse_bracket) 3444 ) 3445 3446 if method: 3447 kwargs["method"] = method.text 3448 if side: 3449 kwargs["side"] = side.text 3450 if kind: 3451 kwargs["kind"] = kind.text 3452 if hint: 3453 kwargs["hint"] = hint 3454 3455 if self._match(TokenType.MATCH_CONDITION): 3456 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3457 3458 if self._match(TokenType.ON): 3459 kwargs["on"] = self._parse_assignment() 3460 elif self._match(TokenType.USING): 3461 kwargs["using"] = self._parse_using_identifiers() 3462 elif ( 3463 not (outer_apply or cross_apply) 3464 and not isinstance(kwargs["this"], exp.Unnest) 3465 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3466 ): 3467 index = self._index 3468 joins: t.Optional[list] = list(self._parse_joins()) 3469 3470 if joins and self._match(TokenType.ON): 3471 kwargs["on"] = self._parse_assignment() 3472 elif joins and self._match(TokenType.USING): 3473 kwargs["using"] = self._parse_using_identifiers() 3474 else: 3475 joins = None 3476 self._retreat(index) 3477 3478 kwargs["this"].set("joins", joins if joins else None) 3479 3480 comments = [c for token in (method, side, kind) if token for c in token.comments] 3481 return self.expression(exp.Join, comments=comments, **kwargs) 3482 3483 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3484 this = self._parse_assignment() 3485 3486 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3487 return this 3488 3489 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3490 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3491 3492 return this 3493 3494 def _parse_index_params(self) -> exp.IndexParameters: 3495 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3496 3497 if self._match(TokenType.L_PAREN, advance=False): 3498 columns = self._parse_wrapped_csv(self._parse_with_operator) 3499 else: 3500 columns = None 3501 3502 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3503 partition_by = self._parse_partition_by() 3504 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3505 tablespace = ( 3506 self._parse_var(any_token=True) 3507 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3508 else None 3509 ) 3510 where = self._parse_where() 3511 3512 on = self._parse_field() if self._match(TokenType.ON) else None 3513 3514 return self.expression( 3515 exp.IndexParameters, 3516 using=using, 3517 columns=columns, 3518 include=include, 3519 partition_by=partition_by, 3520 where=where, 3521 with_storage=with_storage, 3522 tablespace=tablespace, 3523 on=on, 3524 ) 3525 3526 def _parse_index( 3527 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3528 ) -> t.Optional[exp.Index]: 3529 if index or anonymous: 3530 unique = None 3531 primary = None 3532 amp = None 3533 3534 self._match(TokenType.ON) 3535 self._match(TokenType.TABLE) # hive 3536 table = self._parse_table_parts(schema=True) 3537 else: 3538 unique = self._match(TokenType.UNIQUE) 3539 primary = self._match_text_seq("PRIMARY") 3540 amp = self._match_text_seq("AMP") 3541 3542 if not self._match(TokenType.INDEX): 3543 return None 3544 3545 index = self._parse_id_var() 3546 table = None 3547 3548 params = self._parse_index_params() 3549 3550 return self.expression( 3551 exp.Index, 3552 this=index, 3553 table=table, 3554 unique=unique, 3555 primary=primary, 3556 amp=amp, 3557 params=params, 3558 ) 3559 3560 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3561 hints: t.List[exp.Expression] = [] 3562 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3563 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3564 hints.append( 3565 self.expression( 3566 exp.WithTableHint, 3567 expressions=self._parse_csv( 3568 lambda: self._parse_function() or self._parse_var(any_token=True) 3569 ), 3570 ) 3571 ) 3572 self._match_r_paren() 3573 else: 3574 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3575 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3576 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3577 3578 self._match_set((TokenType.INDEX, TokenType.KEY)) 3579 if self._match(TokenType.FOR): 3580 hint.set("target", self._advance_any() and self._prev.text.upper()) 3581 3582 hint.set("expressions", self._parse_wrapped_id_vars()) 3583 hints.append(hint) 3584 3585 return hints or None 3586 3587 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3588 return ( 3589 (not schema and self._parse_function(optional_parens=False)) 3590 or self._parse_id_var(any_token=False) 3591 or self._parse_string_as_identifier() 3592 or self._parse_placeholder() 3593 ) 3594 3595 def _parse_table_parts( 3596 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3597 ) -> exp.Table: 3598 catalog = None 3599 db = None 3600 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3601 3602 while self._match(TokenType.DOT): 3603 if catalog: 3604 # This allows nesting the table in arbitrarily many dot expressions if needed 3605 table = self.expression( 3606 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3607 ) 3608 else: 3609 catalog = db 3610 db = table 3611 # "" used for tsql FROM a..b case 3612 table = self._parse_table_part(schema=schema) or "" 3613 3614 if ( 3615 wildcard 3616 and self._is_connected() 3617 and (isinstance(table, exp.Identifier) or not table) 3618 and self._match(TokenType.STAR) 3619 ): 3620 if isinstance(table, exp.Identifier): 3621 table.args["this"] += "*" 3622 else: 3623 table = exp.Identifier(this="*") 3624 3625 # We bubble up comments from the Identifier to the Table 3626 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3627 3628 if is_db_reference: 3629 catalog = db 3630 db = table 3631 table = None 3632 3633 if not table and not is_db_reference: 3634 self.raise_error(f"Expected table name but got {self._curr}") 3635 if not db and is_db_reference: 3636 self.raise_error(f"Expected database name but got {self._curr}") 3637 3638 table = self.expression( 3639 exp.Table, 3640 comments=comments, 3641 this=table, 3642 db=db, 3643 catalog=catalog, 3644 ) 3645 3646 changes = self._parse_changes() 3647 if changes: 3648 table.set("changes", changes) 3649 3650 at_before = self._parse_historical_data() 3651 if at_before: 3652 table.set("when", at_before) 3653 3654 pivots = self._parse_pivots() 3655 if pivots: 3656 table.set("pivots", pivots) 3657 3658 return table 3659 3660 def _parse_table( 3661 self, 3662 schema: bool = False, 3663 joins: bool = False, 3664 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3665 parse_bracket: bool = False, 3666 is_db_reference: bool = False, 3667 parse_partition: bool = False, 3668 ) -> t.Optional[exp.Expression]: 3669 lateral = self._parse_lateral() 3670 if lateral: 3671 return lateral 3672 3673 unnest = self._parse_unnest() 3674 if unnest: 3675 return unnest 3676 3677 values = self._parse_derived_table_values() 3678 if values: 3679 return values 3680 3681 subquery = self._parse_select(table=True) 3682 if subquery: 3683 if not subquery.args.get("pivots"): 3684 subquery.set("pivots", self._parse_pivots()) 3685 return subquery 3686 3687 bracket = parse_bracket and self._parse_bracket(None) 3688 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3689 3690 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3691 self._parse_table 3692 ) 3693 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3694 3695 only = self._match(TokenType.ONLY) 3696 3697 this = t.cast( 3698 exp.Expression, 3699 bracket 3700 or rows_from 3701 or self._parse_bracket( 3702 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3703 ), 3704 ) 3705 3706 if only: 3707 this.set("only", only) 3708 3709 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3710 self._match_text_seq("*") 3711 3712 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3713 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3714 this.set("partition", self._parse_partition()) 3715 3716 if schema: 3717 return self._parse_schema(this=this) 3718 3719 version = self._parse_version() 3720 3721 if version: 3722 this.set("version", version) 3723 3724 if self.dialect.ALIAS_POST_TABLESAMPLE: 3725 this.set("sample", self._parse_table_sample()) 3726 3727 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3728 if alias: 3729 this.set("alias", alias) 3730 3731 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3732 return self.expression( 3733 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3734 ) 3735 3736 this.set("hints", self._parse_table_hints()) 3737 3738 if not this.args.get("pivots"): 3739 this.set("pivots", self._parse_pivots()) 3740 3741 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3742 this.set("sample", self._parse_table_sample()) 3743 3744 if joins: 3745 for join in self._parse_joins(): 3746 this.append("joins", join) 3747 3748 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3749 this.set("ordinality", True) 3750 this.set("alias", self._parse_table_alias()) 3751 3752 return this 3753 3754 def _parse_version(self) -> t.Optional[exp.Version]: 3755 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3756 this = "TIMESTAMP" 3757 elif self._match(TokenType.VERSION_SNAPSHOT): 3758 this = "VERSION" 3759 else: 3760 return None 3761 3762 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3763 kind = self._prev.text.upper() 3764 start = self._parse_bitwise() 3765 self._match_texts(("TO", "AND")) 3766 end = self._parse_bitwise() 3767 expression: t.Optional[exp.Expression] = self.expression( 3768 exp.Tuple, expressions=[start, end] 3769 ) 3770 elif self._match_text_seq("CONTAINED", "IN"): 3771 kind = "CONTAINED IN" 3772 expression = self.expression( 3773 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3774 ) 3775 elif self._match(TokenType.ALL): 3776 kind = "ALL" 3777 expression = None 3778 else: 3779 self._match_text_seq("AS", "OF") 3780 kind = "AS OF" 3781 expression = self._parse_type() 3782 3783 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3784 3785 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3786 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3787 index = self._index 3788 historical_data = None 3789 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3790 this = self._prev.text.upper() 3791 kind = ( 3792 self._match(TokenType.L_PAREN) 3793 and self._match_texts(self.HISTORICAL_DATA_KIND) 3794 and self._prev.text.upper() 3795 ) 3796 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3797 3798 if expression: 3799 self._match_r_paren() 3800 historical_data = self.expression( 3801 exp.HistoricalData, this=this, kind=kind, expression=expression 3802 ) 3803 else: 3804 self._retreat(index) 3805 3806 return historical_data 3807 3808 def _parse_changes(self) -> t.Optional[exp.Changes]: 3809 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3810 return None 3811 3812 information = self._parse_var(any_token=True) 3813 self._match_r_paren() 3814 3815 return self.expression( 3816 exp.Changes, 3817 information=information, 3818 at_before=self._parse_historical_data(), 3819 end=self._parse_historical_data(), 3820 ) 3821 3822 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3823 if not self._match(TokenType.UNNEST): 3824 return None 3825 3826 expressions = self._parse_wrapped_csv(self._parse_equality) 3827 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3828 3829 alias = self._parse_table_alias() if with_alias else None 3830 3831 if alias: 3832 if self.dialect.UNNEST_COLUMN_ONLY: 3833 if alias.args.get("columns"): 3834 self.raise_error("Unexpected extra column alias in unnest.") 3835 3836 alias.set("columns", [alias.this]) 3837 alias.set("this", None) 3838 3839 columns = alias.args.get("columns") or [] 3840 if offset and len(expressions) < len(columns): 3841 offset = columns.pop() 3842 3843 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3844 self._match(TokenType.ALIAS) 3845 offset = self._parse_id_var( 3846 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3847 ) or exp.to_identifier("offset") 3848 3849 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3850 3851 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3852 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3853 if not is_derived and not ( 3854 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3855 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3856 ): 3857 return None 3858 3859 expressions = self._parse_csv(self._parse_value) 3860 alias = self._parse_table_alias() 3861 3862 if is_derived: 3863 self._match_r_paren() 3864 3865 return self.expression( 3866 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3867 ) 3868 3869 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3870 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3871 as_modifier and self._match_text_seq("USING", "SAMPLE") 3872 ): 3873 return None 3874 3875 bucket_numerator = None 3876 bucket_denominator = None 3877 bucket_field = None 3878 percent = None 3879 size = None 3880 seed = None 3881 3882 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3883 matched_l_paren = self._match(TokenType.L_PAREN) 3884 3885 if self.TABLESAMPLE_CSV: 3886 num = None 3887 expressions = self._parse_csv(self._parse_primary) 3888 else: 3889 expressions = None 3890 num = ( 3891 self._parse_factor() 3892 if self._match(TokenType.NUMBER, advance=False) 3893 else self._parse_primary() or self._parse_placeholder() 3894 ) 3895 3896 if self._match_text_seq("BUCKET"): 3897 bucket_numerator = self._parse_number() 3898 self._match_text_seq("OUT", "OF") 3899 bucket_denominator = bucket_denominator = self._parse_number() 3900 self._match(TokenType.ON) 3901 bucket_field = self._parse_field() 3902 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3903 percent = num 3904 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3905 size = num 3906 else: 3907 percent = num 3908 3909 if matched_l_paren: 3910 self._match_r_paren() 3911 3912 if self._match(TokenType.L_PAREN): 3913 method = self._parse_var(upper=True) 3914 seed = self._match(TokenType.COMMA) and self._parse_number() 3915 self._match_r_paren() 3916 elif self._match_texts(("SEED", "REPEATABLE")): 3917 seed = self._parse_wrapped(self._parse_number) 3918 3919 if not method and self.DEFAULT_SAMPLING_METHOD: 3920 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3921 3922 return self.expression( 3923 exp.TableSample, 3924 expressions=expressions, 3925 method=method, 3926 bucket_numerator=bucket_numerator, 3927 bucket_denominator=bucket_denominator, 3928 bucket_field=bucket_field, 3929 percent=percent, 3930 size=size, 3931 seed=seed, 3932 ) 3933 3934 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3935 return list(iter(self._parse_pivot, None)) or None 3936 3937 def _parse_joins(self) -> t.Iterator[exp.Join]: 3938 return iter(self._parse_join, None) 3939 3940 # https://duckdb.org/docs/sql/statements/pivot 3941 def _parse_simplified_pivot(self) -> exp.Pivot: 3942 def _parse_on() -> t.Optional[exp.Expression]: 3943 this = self._parse_bitwise() 3944 return self._parse_in(this) if self._match(TokenType.IN) else this 3945 3946 this = self._parse_table() 3947 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3948 using = self._match(TokenType.USING) and self._parse_csv( 3949 lambda: self._parse_alias(self._parse_function()) 3950 ) 3951 group = self._parse_group() 3952 return self.expression( 3953 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3954 ) 3955 3956 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3957 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3958 this = self._parse_select_or_expression() 3959 3960 self._match(TokenType.ALIAS) 3961 alias = self._parse_bitwise() 3962 if alias: 3963 if isinstance(alias, exp.Column) and not alias.db: 3964 alias = alias.this 3965 return self.expression(exp.PivotAlias, this=this, alias=alias) 3966 3967 return this 3968 3969 value = self._parse_column() 3970 3971 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3972 self.raise_error("Expecting IN (") 3973 3974 if self._match(TokenType.ANY): 3975 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3976 else: 3977 exprs = self._parse_csv(_parse_aliased_expression) 3978 3979 self._match_r_paren() 3980 return self.expression(exp.In, this=value, expressions=exprs) 3981 3982 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3983 index = self._index 3984 include_nulls = None 3985 3986 if self._match(TokenType.PIVOT): 3987 unpivot = False 3988 elif self._match(TokenType.UNPIVOT): 3989 unpivot = True 3990 3991 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3992 if self._match_text_seq("INCLUDE", "NULLS"): 3993 include_nulls = True 3994 elif self._match_text_seq("EXCLUDE", "NULLS"): 3995 include_nulls = False 3996 else: 3997 return None 3998 3999 expressions = [] 4000 4001 if not self._match(TokenType.L_PAREN): 4002 self._retreat(index) 4003 return None 4004 4005 if unpivot: 4006 expressions = self._parse_csv(self._parse_column) 4007 else: 4008 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4009 4010 if not expressions: 4011 self.raise_error("Failed to parse PIVOT's aggregation list") 4012 4013 if not self._match(TokenType.FOR): 4014 self.raise_error("Expecting FOR") 4015 4016 field = self._parse_pivot_in() 4017 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4018 self._parse_bitwise 4019 ) 4020 4021 self._match_r_paren() 4022 4023 pivot = self.expression( 4024 exp.Pivot, 4025 expressions=expressions, 4026 field=field, 4027 unpivot=unpivot, 4028 include_nulls=include_nulls, 4029 default_on_null=default_on_null, 4030 ) 4031 4032 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4033 pivot.set("alias", self._parse_table_alias()) 4034 4035 if not unpivot: 4036 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4037 4038 columns: t.List[exp.Expression] = [] 4039 for fld in pivot.args["field"].expressions: 4040 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4041 for name in names: 4042 if self.PREFIXED_PIVOT_COLUMNS: 4043 name = f"{name}_{field_name}" if name else field_name 4044 else: 4045 name = f"{field_name}_{name}" if name else field_name 4046 4047 columns.append(exp.to_identifier(name)) 4048 4049 pivot.set("columns", columns) 4050 4051 return pivot 4052 4053 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4054 return [agg.alias for agg in aggregations] 4055 4056 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4057 if not skip_where_token and not self._match(TokenType.PREWHERE): 4058 return None 4059 4060 return self.expression( 4061 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4062 ) 4063 4064 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4065 if not skip_where_token and not self._match(TokenType.WHERE): 4066 return None 4067 4068 return self.expression( 4069 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4070 ) 4071 4072 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4073 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4074 return None 4075 4076 elements: t.Dict[str, t.Any] = defaultdict(list) 4077 4078 if self._match(TokenType.ALL): 4079 elements["all"] = True 4080 elif self._match(TokenType.DISTINCT): 4081 elements["all"] = False 4082 4083 while True: 4084 index = self._index 4085 4086 elements["expressions"].extend( 4087 self._parse_csv( 4088 lambda: None 4089 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4090 else self._parse_assignment() 4091 ) 4092 ) 4093 4094 before_with_index = self._index 4095 with_prefix = self._match(TokenType.WITH) 4096 4097 if self._match(TokenType.ROLLUP): 4098 elements["rollup"].append( 4099 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4100 ) 4101 elif self._match(TokenType.CUBE): 4102 elements["cube"].append( 4103 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4104 ) 4105 elif self._match(TokenType.GROUPING_SETS): 4106 elements["grouping_sets"].append( 4107 self.expression( 4108 exp.GroupingSets, 4109 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4110 ) 4111 ) 4112 elif self._match_text_seq("TOTALS"): 4113 elements["totals"] = True # type: ignore 4114 4115 if before_with_index <= self._index <= before_with_index + 1: 4116 self._retreat(before_with_index) 4117 break 4118 4119 if index == self._index: 4120 break 4121 4122 return self.expression(exp.Group, **elements) # type: ignore 4123 4124 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4125 return self.expression( 4126 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4127 ) 4128 4129 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4130 if self._match(TokenType.L_PAREN): 4131 grouping_set = self._parse_csv(self._parse_column) 4132 self._match_r_paren() 4133 return self.expression(exp.Tuple, expressions=grouping_set) 4134 4135 return self._parse_column() 4136 4137 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4138 if not skip_having_token and not self._match(TokenType.HAVING): 4139 return None 4140 return self.expression(exp.Having, this=self._parse_assignment()) 4141 4142 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4143 if not self._match(TokenType.QUALIFY): 4144 return None 4145 return self.expression(exp.Qualify, this=self._parse_assignment()) 4146 4147 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4148 if skip_start_token: 4149 start = None 4150 elif self._match(TokenType.START_WITH): 4151 start = self._parse_assignment() 4152 else: 4153 return None 4154 4155 self._match(TokenType.CONNECT_BY) 4156 nocycle = self._match_text_seq("NOCYCLE") 4157 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4158 exp.Prior, this=self._parse_bitwise() 4159 ) 4160 connect = self._parse_assignment() 4161 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4162 4163 if not start and self._match(TokenType.START_WITH): 4164 start = self._parse_assignment() 4165 4166 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4167 4168 def _parse_name_as_expression(self) -> exp.Alias: 4169 return self.expression( 4170 exp.Alias, 4171 alias=self._parse_id_var(any_token=True), 4172 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4173 ) 4174 4175 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4176 if self._match_text_seq("INTERPOLATE"): 4177 return self._parse_wrapped_csv(self._parse_name_as_expression) 4178 return None 4179 4180 def _parse_order( 4181 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4182 ) -> t.Optional[exp.Expression]: 4183 siblings = None 4184 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4185 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4186 return this 4187 4188 siblings = True 4189 4190 return self.expression( 4191 exp.Order, 4192 this=this, 4193 expressions=self._parse_csv(self._parse_ordered), 4194 siblings=siblings, 4195 ) 4196 4197 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4198 if not self._match(token): 4199 return None 4200 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4201 4202 def _parse_ordered( 4203 self, parse_method: t.Optional[t.Callable] = None 4204 ) -> t.Optional[exp.Ordered]: 4205 this = parse_method() if parse_method else self._parse_assignment() 4206 if not this: 4207 return None 4208 4209 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4210 this = exp.var("ALL") 4211 4212 asc = self._match(TokenType.ASC) 4213 desc = self._match(TokenType.DESC) or (asc and False) 4214 4215 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4216 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4217 4218 nulls_first = is_nulls_first or False 4219 explicitly_null_ordered = is_nulls_first or is_nulls_last 4220 4221 if ( 4222 not explicitly_null_ordered 4223 and ( 4224 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4225 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4226 ) 4227 and self.dialect.NULL_ORDERING != "nulls_are_last" 4228 ): 4229 nulls_first = True 4230 4231 if self._match_text_seq("WITH", "FILL"): 4232 with_fill = self.expression( 4233 exp.WithFill, 4234 **{ # type: ignore 4235 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4236 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4237 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4238 "interpolate": self._parse_interpolate(), 4239 }, 4240 ) 4241 else: 4242 with_fill = None 4243 4244 return self.expression( 4245 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4246 ) 4247 4248 def _parse_limit( 4249 self, 4250 this: t.Optional[exp.Expression] = None, 4251 top: bool = False, 4252 skip_limit_token: bool = False, 4253 ) -> t.Optional[exp.Expression]: 4254 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4255 comments = self._prev_comments 4256 if top: 4257 limit_paren = self._match(TokenType.L_PAREN) 4258 expression = self._parse_term() if limit_paren else self._parse_number() 4259 4260 if limit_paren: 4261 self._match_r_paren() 4262 else: 4263 expression = self._parse_term() 4264 4265 if self._match(TokenType.COMMA): 4266 offset = expression 4267 expression = self._parse_term() 4268 else: 4269 offset = None 4270 4271 limit_exp = self.expression( 4272 exp.Limit, 4273 this=this, 4274 expression=expression, 4275 offset=offset, 4276 comments=comments, 4277 expressions=self._parse_limit_by(), 4278 ) 4279 4280 return limit_exp 4281 4282 if self._match(TokenType.FETCH): 4283 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4284 direction = self._prev.text.upper() if direction else "FIRST" 4285 4286 count = self._parse_field(tokens=self.FETCH_TOKENS) 4287 percent = self._match(TokenType.PERCENT) 4288 4289 self._match_set((TokenType.ROW, TokenType.ROWS)) 4290 4291 only = self._match_text_seq("ONLY") 4292 with_ties = self._match_text_seq("WITH", "TIES") 4293 4294 if only and with_ties: 4295 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4296 4297 return self.expression( 4298 exp.Fetch, 4299 direction=direction, 4300 count=count, 4301 percent=percent, 4302 with_ties=with_ties, 4303 ) 4304 4305 return this 4306 4307 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4308 if not self._match(TokenType.OFFSET): 4309 return this 4310 4311 count = self._parse_term() 4312 self._match_set((TokenType.ROW, TokenType.ROWS)) 4313 4314 return self.expression( 4315 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4316 ) 4317 4318 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4319 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4320 4321 def _parse_locks(self) -> t.List[exp.Lock]: 4322 locks = [] 4323 while True: 4324 if self._match_text_seq("FOR", "UPDATE"): 4325 update = True 4326 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4327 "LOCK", "IN", "SHARE", "MODE" 4328 ): 4329 update = False 4330 else: 4331 break 4332 4333 expressions = None 4334 if self._match_text_seq("OF"): 4335 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4336 4337 wait: t.Optional[bool | exp.Expression] = None 4338 if self._match_text_seq("NOWAIT"): 4339 wait = True 4340 elif self._match_text_seq("WAIT"): 4341 wait = self._parse_primary() 4342 elif self._match_text_seq("SKIP", "LOCKED"): 4343 wait = False 4344 4345 locks.append( 4346 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4347 ) 4348 4349 return locks 4350 4351 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4352 while this and self._match_set(self.SET_OPERATIONS): 4353 token_type = self._prev.token_type 4354 4355 if token_type == TokenType.UNION: 4356 operation: t.Type[exp.SetOperation] = exp.Union 4357 elif token_type == TokenType.EXCEPT: 4358 operation = exp.Except 4359 else: 4360 operation = exp.Intersect 4361 4362 comments = self._prev.comments 4363 4364 if self._match(TokenType.DISTINCT): 4365 distinct: t.Optional[bool] = True 4366 elif self._match(TokenType.ALL): 4367 distinct = False 4368 else: 4369 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4370 if distinct is None: 4371 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4372 4373 by_name = self._match_text_seq("BY", "NAME") 4374 expression = self._parse_select(nested=True, parse_set_operation=False) 4375 4376 this = self.expression( 4377 operation, 4378 comments=comments, 4379 this=this, 4380 distinct=distinct, 4381 by_name=by_name, 4382 expression=expression, 4383 ) 4384 4385 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4386 expression = this.expression 4387 4388 if expression: 4389 for arg in self.SET_OP_MODIFIERS: 4390 expr = expression.args.get(arg) 4391 if expr: 4392 this.set(arg, expr.pop()) 4393 4394 return this 4395 4396 def _parse_expression(self) -> t.Optional[exp.Expression]: 4397 return self._parse_alias(self._parse_assignment()) 4398 4399 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4400 this = self._parse_disjunction() 4401 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4402 # This allows us to parse <non-identifier token> := <expr> 4403 this = exp.column( 4404 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4405 ) 4406 4407 while self._match_set(self.ASSIGNMENT): 4408 if isinstance(this, exp.Column) and len(this.parts) == 1: 4409 this = this.this 4410 4411 this = self.expression( 4412 self.ASSIGNMENT[self._prev.token_type], 4413 this=this, 4414 comments=self._prev_comments, 4415 expression=self._parse_assignment(), 4416 ) 4417 4418 return this 4419 4420 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4421 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4422 4423 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4424 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4425 4426 def _parse_equality(self) -> t.Optional[exp.Expression]: 4427 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4428 4429 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4430 return self._parse_tokens(self._parse_range, self.COMPARISON) 4431 4432 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4433 this = this or self._parse_bitwise() 4434 negate = self._match(TokenType.NOT) 4435 4436 if self._match_set(self.RANGE_PARSERS): 4437 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4438 if not expression: 4439 return this 4440 4441 this = expression 4442 elif self._match(TokenType.ISNULL): 4443 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4444 4445 # Postgres supports ISNULL and NOTNULL for conditions. 4446 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4447 if self._match(TokenType.NOTNULL): 4448 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4449 this = self.expression(exp.Not, this=this) 4450 4451 if negate: 4452 this = self._negate_range(this) 4453 4454 if self._match(TokenType.IS): 4455 this = self._parse_is(this) 4456 4457 return this 4458 4459 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4460 if not this: 4461 return this 4462 4463 return self.expression(exp.Not, this=this) 4464 4465 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4466 index = self._index - 1 4467 negate = self._match(TokenType.NOT) 4468 4469 if self._match_text_seq("DISTINCT", "FROM"): 4470 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4471 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4472 4473 if self._match(TokenType.JSON): 4474 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4475 4476 if self._match_text_seq("WITH"): 4477 _with = True 4478 elif self._match_text_seq("WITHOUT"): 4479 _with = False 4480 else: 4481 _with = None 4482 4483 unique = self._match(TokenType.UNIQUE) 4484 self._match_text_seq("KEYS") 4485 expression: t.Optional[exp.Expression] = self.expression( 4486 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4487 ) 4488 else: 4489 expression = self._parse_primary() or self._parse_null() 4490 if not expression: 4491 self._retreat(index) 4492 return None 4493 4494 this = self.expression(exp.Is, this=this, expression=expression) 4495 return self.expression(exp.Not, this=this) if negate else this 4496 4497 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4498 unnest = self._parse_unnest(with_alias=False) 4499 if unnest: 4500 this = self.expression(exp.In, this=this, unnest=unnest) 4501 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4502 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4503 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4504 4505 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4506 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4507 else: 4508 this = self.expression(exp.In, this=this, expressions=expressions) 4509 4510 if matched_l_paren: 4511 self._match_r_paren(this) 4512 elif not self._match(TokenType.R_BRACKET, expression=this): 4513 self.raise_error("Expecting ]") 4514 else: 4515 this = self.expression(exp.In, this=this, field=self._parse_column()) 4516 4517 return this 4518 4519 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4520 low = self._parse_bitwise() 4521 self._match(TokenType.AND) 4522 high = self._parse_bitwise() 4523 return self.expression(exp.Between, this=this, low=low, high=high) 4524 4525 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 if not self._match(TokenType.ESCAPE): 4527 return this 4528 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4529 4530 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4531 index = self._index 4532 4533 if not self._match(TokenType.INTERVAL) and match_interval: 4534 return None 4535 4536 if self._match(TokenType.STRING, advance=False): 4537 this = self._parse_primary() 4538 else: 4539 this = self._parse_term() 4540 4541 if not this or ( 4542 isinstance(this, exp.Column) 4543 and not this.table 4544 and not this.this.quoted 4545 and this.name.upper() == "IS" 4546 ): 4547 self._retreat(index) 4548 return None 4549 4550 unit = self._parse_function() or ( 4551 not self._match(TokenType.ALIAS, advance=False) 4552 and self._parse_var(any_token=True, upper=True) 4553 ) 4554 4555 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4556 # each INTERVAL expression into this canonical form so it's easy to transpile 4557 if this and this.is_number: 4558 this = exp.Literal.string(this.to_py()) 4559 elif this and this.is_string: 4560 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4561 if len(parts) == 1: 4562 if unit: 4563 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4564 self._retreat(self._index - 1) 4565 4566 this = exp.Literal.string(parts[0][0]) 4567 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4568 4569 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4570 unit = self.expression( 4571 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4572 ) 4573 4574 interval = self.expression(exp.Interval, this=this, unit=unit) 4575 4576 index = self._index 4577 self._match(TokenType.PLUS) 4578 4579 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4580 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4581 return self.expression( 4582 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4583 ) 4584 4585 self._retreat(index) 4586 return interval 4587 4588 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4589 this = self._parse_term() 4590 4591 while True: 4592 if self._match_set(self.BITWISE): 4593 this = self.expression( 4594 self.BITWISE[self._prev.token_type], 4595 this=this, 4596 expression=self._parse_term(), 4597 ) 4598 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4599 this = self.expression( 4600 exp.DPipe, 4601 this=this, 4602 expression=self._parse_term(), 4603 safe=not self.dialect.STRICT_STRING_CONCAT, 4604 ) 4605 elif self._match(TokenType.DQMARK): 4606 this = self.expression( 4607 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4608 ) 4609 elif self._match_pair(TokenType.LT, TokenType.LT): 4610 this = self.expression( 4611 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4612 ) 4613 elif self._match_pair(TokenType.GT, TokenType.GT): 4614 this = self.expression( 4615 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4616 ) 4617 else: 4618 break 4619 4620 return this 4621 4622 def _parse_term(self) -> t.Optional[exp.Expression]: 4623 this = self._parse_factor() 4624 4625 while self._match_set(self.TERM): 4626 klass = self.TERM[self._prev.token_type] 4627 comments = self._prev_comments 4628 expression = self._parse_factor() 4629 4630 this = self.expression(klass, this=this, comments=comments, expression=expression) 4631 4632 if isinstance(this, exp.Collate): 4633 expr = this.expression 4634 4635 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4636 # fallback to Identifier / Var 4637 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4638 ident = expr.this 4639 if isinstance(ident, exp.Identifier): 4640 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4641 4642 return this 4643 4644 def _parse_factor(self) -> t.Optional[exp.Expression]: 4645 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4646 this = parse_method() 4647 4648 while self._match_set(self.FACTOR): 4649 klass = self.FACTOR[self._prev.token_type] 4650 comments = self._prev_comments 4651 expression = parse_method() 4652 4653 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4654 self._retreat(self._index - 1) 4655 return this 4656 4657 this = self.expression(klass, this=this, comments=comments, expression=expression) 4658 4659 if isinstance(this, exp.Div): 4660 this.args["typed"] = self.dialect.TYPED_DIVISION 4661 this.args["safe"] = self.dialect.SAFE_DIVISION 4662 4663 return this 4664 4665 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4666 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4667 4668 def _parse_unary(self) -> t.Optional[exp.Expression]: 4669 if self._match_set(self.UNARY_PARSERS): 4670 return self.UNARY_PARSERS[self._prev.token_type](self) 4671 return self._parse_at_time_zone(self._parse_type()) 4672 4673 def _parse_type( 4674 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4675 ) -> t.Optional[exp.Expression]: 4676 interval = parse_interval and self._parse_interval() 4677 if interval: 4678 return interval 4679 4680 index = self._index 4681 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4682 4683 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4684 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4685 if isinstance(data_type, exp.Cast): 4686 # This constructor can contain ops directly after it, for instance struct unnesting: 4687 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4688 return self._parse_column_ops(data_type) 4689 4690 if data_type: 4691 index2 = self._index 4692 this = self._parse_primary() 4693 4694 if isinstance(this, exp.Literal): 4695 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4696 if parser: 4697 return parser(self, this, data_type) 4698 4699 return self.expression(exp.Cast, this=this, to=data_type) 4700 4701 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4702 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4703 # 4704 # If the index difference here is greater than 1, that means the parser itself must have 4705 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4706 # 4707 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4708 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4709 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4710 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4711 # 4712 # In these cases, we don't really want to return the converted type, but instead retreat 4713 # and try to parse a Column or Identifier in the section below. 4714 if data_type.expressions and index2 - index > 1: 4715 self._retreat(index2) 4716 return self._parse_column_ops(data_type) 4717 4718 self._retreat(index) 4719 4720 if fallback_to_identifier: 4721 return self._parse_id_var() 4722 4723 this = self._parse_column() 4724 return this and self._parse_column_ops(this) 4725 4726 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4727 this = self._parse_type() 4728 if not this: 4729 return None 4730 4731 if isinstance(this, exp.Column) and not this.table: 4732 this = exp.var(this.name.upper()) 4733 4734 return self.expression( 4735 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4736 ) 4737 4738 def _parse_types( 4739 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4740 ) -> t.Optional[exp.Expression]: 4741 index = self._index 4742 4743 this: t.Optional[exp.Expression] = None 4744 prefix = self._match_text_seq("SYSUDTLIB", ".") 4745 4746 if not self._match_set(self.TYPE_TOKENS): 4747 identifier = allow_identifiers and self._parse_id_var( 4748 any_token=False, tokens=(TokenType.VAR,) 4749 ) 4750 if isinstance(identifier, exp.Identifier): 4751 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4752 4753 if len(tokens) != 1: 4754 self.raise_error("Unexpected identifier", self._prev) 4755 4756 if tokens[0].token_type in self.TYPE_TOKENS: 4757 self._prev = tokens[0] 4758 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4759 type_name = identifier.name 4760 4761 while self._match(TokenType.DOT): 4762 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4763 4764 this = exp.DataType.build(type_name, udt=True) 4765 else: 4766 self._retreat(self._index - 1) 4767 return None 4768 else: 4769 return None 4770 4771 type_token = self._prev.token_type 4772 4773 if type_token == TokenType.PSEUDO_TYPE: 4774 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4775 4776 if type_token == TokenType.OBJECT_IDENTIFIER: 4777 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4778 4779 # https://materialize.com/docs/sql/types/map/ 4780 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4781 key_type = self._parse_types( 4782 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4783 ) 4784 if not self._match(TokenType.FARROW): 4785 self._retreat(index) 4786 return None 4787 4788 value_type = self._parse_types( 4789 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4790 ) 4791 if not self._match(TokenType.R_BRACKET): 4792 self._retreat(index) 4793 return None 4794 4795 return exp.DataType( 4796 this=exp.DataType.Type.MAP, 4797 expressions=[key_type, value_type], 4798 nested=True, 4799 prefix=prefix, 4800 ) 4801 4802 nested = type_token in self.NESTED_TYPE_TOKENS 4803 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4804 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4805 expressions = None 4806 maybe_func = False 4807 4808 if self._match(TokenType.L_PAREN): 4809 if is_struct: 4810 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4811 elif nested: 4812 expressions = self._parse_csv( 4813 lambda: self._parse_types( 4814 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4815 ) 4816 ) 4817 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4818 this = expressions[0] 4819 this.set("nullable", True) 4820 self._match_r_paren() 4821 return this 4822 elif type_token in self.ENUM_TYPE_TOKENS: 4823 expressions = self._parse_csv(self._parse_equality) 4824 elif is_aggregate: 4825 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4826 any_token=False, tokens=(TokenType.VAR,) 4827 ) 4828 if not func_or_ident or not self._match(TokenType.COMMA): 4829 return None 4830 expressions = self._parse_csv( 4831 lambda: self._parse_types( 4832 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4833 ) 4834 ) 4835 expressions.insert(0, func_or_ident) 4836 else: 4837 expressions = self._parse_csv(self._parse_type_size) 4838 4839 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4840 if type_token == TokenType.VECTOR and len(expressions) == 2: 4841 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4842 4843 if not expressions or not self._match(TokenType.R_PAREN): 4844 self._retreat(index) 4845 return None 4846 4847 maybe_func = True 4848 4849 values: t.Optional[t.List[exp.Expression]] = None 4850 4851 if nested and self._match(TokenType.LT): 4852 if is_struct: 4853 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4854 else: 4855 expressions = self._parse_csv( 4856 lambda: self._parse_types( 4857 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4858 ) 4859 ) 4860 4861 if not self._match(TokenType.GT): 4862 self.raise_error("Expecting >") 4863 4864 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4865 values = self._parse_csv(self._parse_assignment) 4866 if not values and is_struct: 4867 values = None 4868 self._retreat(self._index - 1) 4869 else: 4870 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4871 4872 if type_token in self.TIMESTAMPS: 4873 if self._match_text_seq("WITH", "TIME", "ZONE"): 4874 maybe_func = False 4875 tz_type = ( 4876 exp.DataType.Type.TIMETZ 4877 if type_token in self.TIMES 4878 else exp.DataType.Type.TIMESTAMPTZ 4879 ) 4880 this = exp.DataType(this=tz_type, expressions=expressions) 4881 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4882 maybe_func = False 4883 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4884 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4885 maybe_func = False 4886 elif type_token == TokenType.INTERVAL: 4887 unit = self._parse_var(upper=True) 4888 if unit: 4889 if self._match_text_seq("TO"): 4890 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4891 4892 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4893 else: 4894 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4895 4896 if maybe_func and check_func: 4897 index2 = self._index 4898 peek = self._parse_string() 4899 4900 if not peek: 4901 self._retreat(index) 4902 return None 4903 4904 self._retreat(index2) 4905 4906 if not this: 4907 if self._match_text_seq("UNSIGNED"): 4908 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4909 if not unsigned_type_token: 4910 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4911 4912 type_token = unsigned_type_token or type_token 4913 4914 this = exp.DataType( 4915 this=exp.DataType.Type[type_token.value], 4916 expressions=expressions, 4917 nested=nested, 4918 prefix=prefix, 4919 ) 4920 4921 # Empty arrays/structs are allowed 4922 if values is not None: 4923 cls = exp.Struct if is_struct else exp.Array 4924 this = exp.cast(cls(expressions=values), this, copy=False) 4925 4926 elif expressions: 4927 this.set("expressions", expressions) 4928 4929 # https://materialize.com/docs/sql/types/list/#type-name 4930 while self._match(TokenType.LIST): 4931 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4932 4933 index = self._index 4934 4935 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4936 matched_array = self._match(TokenType.ARRAY) 4937 4938 while self._curr: 4939 datatype_token = self._prev.token_type 4940 matched_l_bracket = self._match(TokenType.L_BRACKET) 4941 if not matched_l_bracket and not matched_array: 4942 break 4943 4944 matched_array = False 4945 values = self._parse_csv(self._parse_assignment) or None 4946 if ( 4947 values 4948 and not schema 4949 and ( 4950 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4951 ) 4952 ): 4953 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4954 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4955 self._retreat(index) 4956 break 4957 4958 this = exp.DataType( 4959 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4960 ) 4961 self._match(TokenType.R_BRACKET) 4962 4963 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4964 converter = self.TYPE_CONVERTERS.get(this.this) 4965 if converter: 4966 this = converter(t.cast(exp.DataType, this)) 4967 4968 return this 4969 4970 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4971 index = self._index 4972 4973 if ( 4974 self._curr 4975 and self._next 4976 and self._curr.token_type in self.TYPE_TOKENS 4977 and self._next.token_type in self.TYPE_TOKENS 4978 ): 4979 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4980 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4981 this = self._parse_id_var() 4982 else: 4983 this = ( 4984 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4985 or self._parse_id_var() 4986 ) 4987 4988 self._match(TokenType.COLON) 4989 4990 if ( 4991 type_required 4992 and not isinstance(this, exp.DataType) 4993 and not self._match_set(self.TYPE_TOKENS, advance=False) 4994 ): 4995 self._retreat(index) 4996 return self._parse_types() 4997 4998 return self._parse_column_def(this) 4999 5000 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5001 if not self._match_text_seq("AT", "TIME", "ZONE"): 5002 return this 5003 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5004 5005 def _parse_column(self) -> t.Optional[exp.Expression]: 5006 this = self._parse_column_reference() 5007 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5008 5009 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5010 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5011 5012 return column 5013 5014 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5015 this = self._parse_field() 5016 if ( 5017 not this 5018 and self._match(TokenType.VALUES, advance=False) 5019 and self.VALUES_FOLLOWED_BY_PAREN 5020 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5021 ): 5022 this = self._parse_id_var() 5023 5024 if isinstance(this, exp.Identifier): 5025 # We bubble up comments from the Identifier to the Column 5026 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5027 5028 return this 5029 5030 def _parse_colon_as_variant_extract( 5031 self, this: t.Optional[exp.Expression] 5032 ) -> t.Optional[exp.Expression]: 5033 casts = [] 5034 json_path = [] 5035 escape = None 5036 5037 while self._match(TokenType.COLON): 5038 start_index = self._index 5039 5040 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5041 path = self._parse_column_ops( 5042 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5043 ) 5044 5045 # The cast :: operator has a lower precedence than the extraction operator :, so 5046 # we rearrange the AST appropriately to avoid casting the JSON path 5047 while isinstance(path, exp.Cast): 5048 casts.append(path.to) 5049 path = path.this 5050 5051 if casts: 5052 dcolon_offset = next( 5053 i 5054 for i, t in enumerate(self._tokens[start_index:]) 5055 if t.token_type == TokenType.DCOLON 5056 ) 5057 end_token = self._tokens[start_index + dcolon_offset - 1] 5058 else: 5059 end_token = self._prev 5060 5061 if path: 5062 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5063 # it'll roundtrip to a string literal in GET_PATH 5064 if isinstance(path, exp.Identifier) and path.quoted: 5065 escape = True 5066 5067 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5068 5069 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5070 # Databricks transforms it back to the colon/dot notation 5071 if json_path: 5072 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5073 5074 if json_path_expr: 5075 json_path_expr.set("escape", escape) 5076 5077 this = self.expression( 5078 exp.JSONExtract, 5079 this=this, 5080 expression=json_path_expr, 5081 variant_extract=True, 5082 ) 5083 5084 while casts: 5085 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5086 5087 return this 5088 5089 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5090 return self._parse_types() 5091 5092 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5093 this = self._parse_bracket(this) 5094 5095 while self._match_set(self.COLUMN_OPERATORS): 5096 op_token = self._prev.token_type 5097 op = self.COLUMN_OPERATORS.get(op_token) 5098 5099 if op_token == TokenType.DCOLON: 5100 field = self._parse_dcolon() 5101 if not field: 5102 self.raise_error("Expected type") 5103 elif op and self._curr: 5104 field = self._parse_column_reference() or self._parse_bracket() 5105 else: 5106 field = self._parse_field(any_token=True, anonymous_func=True) 5107 5108 if isinstance(field, exp.Func) and this: 5109 # bigquery allows function calls like x.y.count(...) 5110 # SAFE.SUBSTR(...) 5111 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5112 this = exp.replace_tree( 5113 this, 5114 lambda n: ( 5115 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5116 if n.table 5117 else n.this 5118 ) 5119 if isinstance(n, exp.Column) 5120 else n, 5121 ) 5122 5123 if op: 5124 this = op(self, this, field) 5125 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5126 this = self.expression( 5127 exp.Column, 5128 comments=this.comments, 5129 this=field, 5130 table=this.this, 5131 db=this.args.get("table"), 5132 catalog=this.args.get("db"), 5133 ) 5134 else: 5135 this = self.expression(exp.Dot, this=this, expression=field) 5136 5137 this = self._parse_bracket(this) 5138 5139 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5140 5141 def _parse_primary(self) -> t.Optional[exp.Expression]: 5142 if self._match_set(self.PRIMARY_PARSERS): 5143 token_type = self._prev.token_type 5144 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5145 5146 if token_type == TokenType.STRING: 5147 expressions = [primary] 5148 while self._match(TokenType.STRING): 5149 expressions.append(exp.Literal.string(self._prev.text)) 5150 5151 if len(expressions) > 1: 5152 return self.expression(exp.Concat, expressions=expressions) 5153 5154 return primary 5155 5156 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5157 return exp.Literal.number(f"0.{self._prev.text}") 5158 5159 if self._match(TokenType.L_PAREN): 5160 comments = self._prev_comments 5161 query = self._parse_select() 5162 5163 if query: 5164 expressions = [query] 5165 else: 5166 expressions = self._parse_expressions() 5167 5168 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5169 5170 if not this and self._match(TokenType.R_PAREN, advance=False): 5171 this = self.expression(exp.Tuple) 5172 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5173 this = self._parse_subquery(this=this, parse_alias=False) 5174 elif isinstance(this, exp.Subquery): 5175 this = self._parse_subquery( 5176 this=self._parse_set_operations(this), parse_alias=False 5177 ) 5178 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5179 this = self.expression(exp.Tuple, expressions=expressions) 5180 else: 5181 this = self.expression(exp.Paren, this=this) 5182 5183 if this: 5184 this.add_comments(comments) 5185 5186 self._match_r_paren(expression=this) 5187 return this 5188 5189 return None 5190 5191 def _parse_field( 5192 self, 5193 any_token: bool = False, 5194 tokens: t.Optional[t.Collection[TokenType]] = None, 5195 anonymous_func: bool = False, 5196 ) -> t.Optional[exp.Expression]: 5197 if anonymous_func: 5198 field = ( 5199 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5200 or self._parse_primary() 5201 ) 5202 else: 5203 field = self._parse_primary() or self._parse_function( 5204 anonymous=anonymous_func, any_token=any_token 5205 ) 5206 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5207 5208 def _parse_function( 5209 self, 5210 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5211 anonymous: bool = False, 5212 optional_parens: bool = True, 5213 any_token: bool = False, 5214 ) -> t.Optional[exp.Expression]: 5215 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5216 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5217 fn_syntax = False 5218 if ( 5219 self._match(TokenType.L_BRACE, advance=False) 5220 and self._next 5221 and self._next.text.upper() == "FN" 5222 ): 5223 self._advance(2) 5224 fn_syntax = True 5225 5226 func = self._parse_function_call( 5227 functions=functions, 5228 anonymous=anonymous, 5229 optional_parens=optional_parens, 5230 any_token=any_token, 5231 ) 5232 5233 if fn_syntax: 5234 self._match(TokenType.R_BRACE) 5235 5236 return func 5237 5238 def _parse_function_call( 5239 self, 5240 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5241 anonymous: bool = False, 5242 optional_parens: bool = True, 5243 any_token: bool = False, 5244 ) -> t.Optional[exp.Expression]: 5245 if not self._curr: 5246 return None 5247 5248 comments = self._curr.comments 5249 token_type = self._curr.token_type 5250 this = self._curr.text 5251 upper = this.upper() 5252 5253 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5254 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5255 self._advance() 5256 return self._parse_window(parser(self)) 5257 5258 if not self._next or self._next.token_type != TokenType.L_PAREN: 5259 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5260 self._advance() 5261 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5262 5263 return None 5264 5265 if any_token: 5266 if token_type in self.RESERVED_TOKENS: 5267 return None 5268 elif token_type not in self.FUNC_TOKENS: 5269 return None 5270 5271 self._advance(2) 5272 5273 parser = self.FUNCTION_PARSERS.get(upper) 5274 if parser and not anonymous: 5275 this = parser(self) 5276 else: 5277 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5278 5279 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5280 this = self.expression( 5281 subquery_predicate, comments=comments, this=self._parse_select() 5282 ) 5283 self._match_r_paren() 5284 return this 5285 5286 if functions is None: 5287 functions = self.FUNCTIONS 5288 5289 function = functions.get(upper) 5290 5291 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5292 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5293 5294 if alias: 5295 args = self._kv_to_prop_eq(args) 5296 5297 if function and not anonymous: 5298 if "dialect" in function.__code__.co_varnames: 5299 func = function(args, dialect=self.dialect) 5300 else: 5301 func = function(args) 5302 5303 func = self.validate_expression(func, args) 5304 if not self.dialect.NORMALIZE_FUNCTIONS: 5305 func.meta["name"] = this 5306 5307 this = func 5308 else: 5309 if token_type == TokenType.IDENTIFIER: 5310 this = exp.Identifier(this=this, quoted=True) 5311 this = self.expression(exp.Anonymous, this=this, expressions=args) 5312 5313 if isinstance(this, exp.Expression): 5314 this.add_comments(comments) 5315 5316 self._match_r_paren(this) 5317 return self._parse_window(this) 5318 5319 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5320 return expression 5321 5322 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5323 transformed = [] 5324 5325 for index, e in enumerate(expressions): 5326 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5327 if isinstance(e, exp.Alias): 5328 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5329 5330 if not isinstance(e, exp.PropertyEQ): 5331 e = self.expression( 5332 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5333 ) 5334 5335 if isinstance(e.this, exp.Column): 5336 e.this.replace(e.this.this) 5337 else: 5338 e = self._to_prop_eq(e, index) 5339 5340 transformed.append(e) 5341 5342 return transformed 5343 5344 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5345 return self._parse_column_def(self._parse_id_var()) 5346 5347 def _parse_user_defined_function( 5348 self, kind: t.Optional[TokenType] = None 5349 ) -> t.Optional[exp.Expression]: 5350 this = self._parse_id_var() 5351 5352 while self._match(TokenType.DOT): 5353 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5354 5355 if not self._match(TokenType.L_PAREN): 5356 return this 5357 5358 expressions = self._parse_csv(self._parse_function_parameter) 5359 self._match_r_paren() 5360 return self.expression( 5361 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5362 ) 5363 5364 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5365 literal = self._parse_primary() 5366 if literal: 5367 return self.expression(exp.Introducer, this=token.text, expression=literal) 5368 5369 return self.expression(exp.Identifier, this=token.text) 5370 5371 def _parse_session_parameter(self) -> exp.SessionParameter: 5372 kind = None 5373 this = self._parse_id_var() or self._parse_primary() 5374 5375 if this and self._match(TokenType.DOT): 5376 kind = this.name 5377 this = self._parse_var() or self._parse_primary() 5378 5379 return self.expression(exp.SessionParameter, this=this, kind=kind) 5380 5381 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5382 return self._parse_id_var() 5383 5384 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5385 index = self._index 5386 5387 if self._match(TokenType.L_PAREN): 5388 expressions = t.cast( 5389 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5390 ) 5391 5392 if not self._match(TokenType.R_PAREN): 5393 self._retreat(index) 5394 else: 5395 expressions = [self._parse_lambda_arg()] 5396 5397 if self._match_set(self.LAMBDAS): 5398 return self.LAMBDAS[self._prev.token_type](self, expressions) 5399 5400 self._retreat(index) 5401 5402 this: t.Optional[exp.Expression] 5403 5404 if self._match(TokenType.DISTINCT): 5405 this = self.expression( 5406 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5407 ) 5408 else: 5409 this = self._parse_select_or_expression(alias=alias) 5410 5411 return self._parse_limit( 5412 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5413 ) 5414 5415 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5416 index = self._index 5417 if not self._match(TokenType.L_PAREN): 5418 return this 5419 5420 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5421 # expr can be of both types 5422 if self._match_set(self.SELECT_START_TOKENS): 5423 self._retreat(index) 5424 return this 5425 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5426 self._match_r_paren() 5427 return self.expression(exp.Schema, this=this, expressions=args) 5428 5429 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5430 return self._parse_column_def(self._parse_field(any_token=True)) 5431 5432 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5433 # column defs are not really columns, they're identifiers 5434 if isinstance(this, exp.Column): 5435 this = this.this 5436 5437 kind = self._parse_types(schema=True) 5438 5439 if self._match_text_seq("FOR", "ORDINALITY"): 5440 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5441 5442 constraints: t.List[exp.Expression] = [] 5443 5444 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5445 ("ALIAS", "MATERIALIZED") 5446 ): 5447 persisted = self._prev.text.upper() == "MATERIALIZED" 5448 constraint_kind = exp.ComputedColumnConstraint( 5449 this=self._parse_assignment(), 5450 persisted=persisted or self._match_text_seq("PERSISTED"), 5451 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5452 ) 5453 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5454 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5455 self._match(TokenType.ALIAS) 5456 constraints.append( 5457 self.expression( 5458 exp.ColumnConstraint, 5459 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5460 ) 5461 ) 5462 5463 while True: 5464 constraint = self._parse_column_constraint() 5465 if not constraint: 5466 break 5467 constraints.append(constraint) 5468 5469 if not kind and not constraints: 5470 return this 5471 5472 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5473 5474 def _parse_auto_increment( 5475 self, 5476 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5477 start = None 5478 increment = None 5479 5480 if self._match(TokenType.L_PAREN, advance=False): 5481 args = self._parse_wrapped_csv(self._parse_bitwise) 5482 start = seq_get(args, 0) 5483 increment = seq_get(args, 1) 5484 elif self._match_text_seq("START"): 5485 start = self._parse_bitwise() 5486 self._match_text_seq("INCREMENT") 5487 increment = self._parse_bitwise() 5488 5489 if start and increment: 5490 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5491 5492 return exp.AutoIncrementColumnConstraint() 5493 5494 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5495 if not self._match_text_seq("REFRESH"): 5496 self._retreat(self._index - 1) 5497 return None 5498 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5499 5500 def _parse_compress(self) -> exp.CompressColumnConstraint: 5501 if self._match(TokenType.L_PAREN, advance=False): 5502 return self.expression( 5503 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5504 ) 5505 5506 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5507 5508 def _parse_generated_as_identity( 5509 self, 5510 ) -> ( 5511 exp.GeneratedAsIdentityColumnConstraint 5512 | exp.ComputedColumnConstraint 5513 | exp.GeneratedAsRowColumnConstraint 5514 ): 5515 if self._match_text_seq("BY", "DEFAULT"): 5516 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5517 this = self.expression( 5518 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5519 ) 5520 else: 5521 self._match_text_seq("ALWAYS") 5522 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5523 5524 self._match(TokenType.ALIAS) 5525 5526 if self._match_text_seq("ROW"): 5527 start = self._match_text_seq("START") 5528 if not start: 5529 self._match(TokenType.END) 5530 hidden = self._match_text_seq("HIDDEN") 5531 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5532 5533 identity = self._match_text_seq("IDENTITY") 5534 5535 if self._match(TokenType.L_PAREN): 5536 if self._match(TokenType.START_WITH): 5537 this.set("start", self._parse_bitwise()) 5538 if self._match_text_seq("INCREMENT", "BY"): 5539 this.set("increment", self._parse_bitwise()) 5540 if self._match_text_seq("MINVALUE"): 5541 this.set("minvalue", self._parse_bitwise()) 5542 if self._match_text_seq("MAXVALUE"): 5543 this.set("maxvalue", self._parse_bitwise()) 5544 5545 if self._match_text_seq("CYCLE"): 5546 this.set("cycle", True) 5547 elif self._match_text_seq("NO", "CYCLE"): 5548 this.set("cycle", False) 5549 5550 if not identity: 5551 this.set("expression", self._parse_range()) 5552 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5553 args = self._parse_csv(self._parse_bitwise) 5554 this.set("start", seq_get(args, 0)) 5555 this.set("increment", seq_get(args, 1)) 5556 5557 self._match_r_paren() 5558 5559 return this 5560 5561 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5562 self._match_text_seq("LENGTH") 5563 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5564 5565 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5566 if self._match_text_seq("NULL"): 5567 return self.expression(exp.NotNullColumnConstraint) 5568 if self._match_text_seq("CASESPECIFIC"): 5569 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5570 if self._match_text_seq("FOR", "REPLICATION"): 5571 return self.expression(exp.NotForReplicationColumnConstraint) 5572 5573 # Unconsume the `NOT` token 5574 self._retreat(self._index - 1) 5575 return None 5576 5577 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5578 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5579 5580 procedure_option_follows = ( 5581 self._match(TokenType.WITH, advance=False) 5582 and self._next 5583 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5584 ) 5585 5586 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5587 return self.expression( 5588 exp.ColumnConstraint, 5589 this=this, 5590 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5591 ) 5592 5593 return this 5594 5595 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5596 if not self._match(TokenType.CONSTRAINT): 5597 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5598 5599 return self.expression( 5600 exp.Constraint, 5601 this=self._parse_id_var(), 5602 expressions=self._parse_unnamed_constraints(), 5603 ) 5604 5605 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5606 constraints = [] 5607 while True: 5608 constraint = self._parse_unnamed_constraint() or self._parse_function() 5609 if not constraint: 5610 break 5611 constraints.append(constraint) 5612 5613 return constraints 5614 5615 def _parse_unnamed_constraint( 5616 self, constraints: t.Optional[t.Collection[str]] = None 5617 ) -> t.Optional[exp.Expression]: 5618 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5619 constraints or self.CONSTRAINT_PARSERS 5620 ): 5621 return None 5622 5623 constraint = self._prev.text.upper() 5624 if constraint not in self.CONSTRAINT_PARSERS: 5625 self.raise_error(f"No parser found for schema constraint {constraint}.") 5626 5627 return self.CONSTRAINT_PARSERS[constraint](self) 5628 5629 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5630 return self._parse_id_var(any_token=False) 5631 5632 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5633 self._match_text_seq("KEY") 5634 return self.expression( 5635 exp.UniqueColumnConstraint, 5636 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5637 this=self._parse_schema(self._parse_unique_key()), 5638 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5639 on_conflict=self._parse_on_conflict(), 5640 ) 5641 5642 def _parse_key_constraint_options(self) -> t.List[str]: 5643 options = [] 5644 while True: 5645 if not self._curr: 5646 break 5647 5648 if self._match(TokenType.ON): 5649 action = None 5650 on = self._advance_any() and self._prev.text 5651 5652 if self._match_text_seq("NO", "ACTION"): 5653 action = "NO ACTION" 5654 elif self._match_text_seq("CASCADE"): 5655 action = "CASCADE" 5656 elif self._match_text_seq("RESTRICT"): 5657 action = "RESTRICT" 5658 elif self._match_pair(TokenType.SET, TokenType.NULL): 5659 action = "SET NULL" 5660 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5661 action = "SET DEFAULT" 5662 else: 5663 self.raise_error("Invalid key constraint") 5664 5665 options.append(f"ON {on} {action}") 5666 else: 5667 var = self._parse_var_from_options( 5668 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5669 ) 5670 if not var: 5671 break 5672 options.append(var.name) 5673 5674 return options 5675 5676 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5677 if match and not self._match(TokenType.REFERENCES): 5678 return None 5679 5680 expressions = None 5681 this = self._parse_table(schema=True) 5682 options = self._parse_key_constraint_options() 5683 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5684 5685 def _parse_foreign_key(self) -> exp.ForeignKey: 5686 expressions = self._parse_wrapped_id_vars() 5687 reference = self._parse_references() 5688 options = {} 5689 5690 while self._match(TokenType.ON): 5691 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5692 self.raise_error("Expected DELETE or UPDATE") 5693 5694 kind = self._prev.text.lower() 5695 5696 if self._match_text_seq("NO", "ACTION"): 5697 action = "NO ACTION" 5698 elif self._match(TokenType.SET): 5699 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5700 action = "SET " + self._prev.text.upper() 5701 else: 5702 self._advance() 5703 action = self._prev.text.upper() 5704 5705 options[kind] = action 5706 5707 return self.expression( 5708 exp.ForeignKey, 5709 expressions=expressions, 5710 reference=reference, 5711 **options, # type: ignore 5712 ) 5713 5714 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5715 return self._parse_field() 5716 5717 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5718 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5719 self._retreat(self._index - 1) 5720 return None 5721 5722 id_vars = self._parse_wrapped_id_vars() 5723 return self.expression( 5724 exp.PeriodForSystemTimeConstraint, 5725 this=seq_get(id_vars, 0), 5726 expression=seq_get(id_vars, 1), 5727 ) 5728 5729 def _parse_primary_key( 5730 self, wrapped_optional: bool = False, in_props: bool = False 5731 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5732 desc = ( 5733 self._match_set((TokenType.ASC, TokenType.DESC)) 5734 and self._prev.token_type == TokenType.DESC 5735 ) 5736 5737 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5738 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5739 5740 expressions = self._parse_wrapped_csv( 5741 self._parse_primary_key_part, optional=wrapped_optional 5742 ) 5743 options = self._parse_key_constraint_options() 5744 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5745 5746 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5747 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5748 5749 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5750 """ 5751 Parses a datetime column in ODBC format. We parse the column into the corresponding 5752 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5753 same as we did for `DATE('yyyy-mm-dd')`. 5754 5755 Reference: 5756 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5757 """ 5758 self._match(TokenType.VAR) 5759 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5760 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5761 if not self._match(TokenType.R_BRACE): 5762 self.raise_error("Expected }") 5763 return expression 5764 5765 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5766 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5767 return this 5768 5769 bracket_kind = self._prev.token_type 5770 if ( 5771 bracket_kind == TokenType.L_BRACE 5772 and self._curr 5773 and self._curr.token_type == TokenType.VAR 5774 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5775 ): 5776 return self._parse_odbc_datetime_literal() 5777 5778 expressions = self._parse_csv( 5779 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5780 ) 5781 5782 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5783 self.raise_error("Expected ]") 5784 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5785 self.raise_error("Expected }") 5786 5787 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5788 if bracket_kind == TokenType.L_BRACE: 5789 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5790 elif not this: 5791 this = build_array_constructor( 5792 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5793 ) 5794 else: 5795 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5796 if constructor_type: 5797 return build_array_constructor( 5798 constructor_type, 5799 args=expressions, 5800 bracket_kind=bracket_kind, 5801 dialect=self.dialect, 5802 ) 5803 5804 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5805 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5806 5807 self._add_comments(this) 5808 return self._parse_bracket(this) 5809 5810 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5811 if self._match(TokenType.COLON): 5812 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5813 return this 5814 5815 def _parse_case(self) -> t.Optional[exp.Expression]: 5816 ifs = [] 5817 default = None 5818 5819 comments = self._prev_comments 5820 expression = self._parse_assignment() 5821 5822 while self._match(TokenType.WHEN): 5823 this = self._parse_assignment() 5824 self._match(TokenType.THEN) 5825 then = self._parse_assignment() 5826 ifs.append(self.expression(exp.If, this=this, true=then)) 5827 5828 if self._match(TokenType.ELSE): 5829 default = self._parse_assignment() 5830 5831 if not self._match(TokenType.END): 5832 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5833 default = exp.column("interval") 5834 else: 5835 self.raise_error("Expected END after CASE", self._prev) 5836 5837 return self.expression( 5838 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5839 ) 5840 5841 def _parse_if(self) -> t.Optional[exp.Expression]: 5842 if self._match(TokenType.L_PAREN): 5843 args = self._parse_csv(self._parse_assignment) 5844 this = self.validate_expression(exp.If.from_arg_list(args), args) 5845 self._match_r_paren() 5846 else: 5847 index = self._index - 1 5848 5849 if self.NO_PAREN_IF_COMMANDS and index == 0: 5850 return self._parse_as_command(self._prev) 5851 5852 condition = self._parse_assignment() 5853 5854 if not condition: 5855 self._retreat(index) 5856 return None 5857 5858 self._match(TokenType.THEN) 5859 true = self._parse_assignment() 5860 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5861 self._match(TokenType.END) 5862 this = self.expression(exp.If, this=condition, true=true, false=false) 5863 5864 return this 5865 5866 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5867 if not self._match_text_seq("VALUE", "FOR"): 5868 self._retreat(self._index - 1) 5869 return None 5870 5871 return self.expression( 5872 exp.NextValueFor, 5873 this=self._parse_column(), 5874 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5875 ) 5876 5877 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5878 this = self._parse_function() or self._parse_var_or_string(upper=True) 5879 5880 if self._match(TokenType.FROM): 5881 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5882 5883 if not self._match(TokenType.COMMA): 5884 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5885 5886 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5887 5888 def _parse_gap_fill(self) -> exp.GapFill: 5889 self._match(TokenType.TABLE) 5890 this = self._parse_table() 5891 5892 self._match(TokenType.COMMA) 5893 args = [this, *self._parse_csv(self._parse_lambda)] 5894 5895 gap_fill = exp.GapFill.from_arg_list(args) 5896 return self.validate_expression(gap_fill, args) 5897 5898 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5899 this = self._parse_assignment() 5900 5901 if not self._match(TokenType.ALIAS): 5902 if self._match(TokenType.COMMA): 5903 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5904 5905 self.raise_error("Expected AS after CAST") 5906 5907 fmt = None 5908 to = self._parse_types() 5909 5910 if self._match(TokenType.FORMAT): 5911 fmt_string = self._parse_string() 5912 fmt = self._parse_at_time_zone(fmt_string) 5913 5914 if not to: 5915 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5916 if to.this in exp.DataType.TEMPORAL_TYPES: 5917 this = self.expression( 5918 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5919 this=this, 5920 format=exp.Literal.string( 5921 format_time( 5922 fmt_string.this if fmt_string else "", 5923 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5924 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5925 ) 5926 ), 5927 safe=safe, 5928 ) 5929 5930 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5931 this.set("zone", fmt.args["zone"]) 5932 return this 5933 elif not to: 5934 self.raise_error("Expected TYPE after CAST") 5935 elif isinstance(to, exp.Identifier): 5936 to = exp.DataType.build(to.name, udt=True) 5937 elif to.this == exp.DataType.Type.CHAR: 5938 if self._match(TokenType.CHARACTER_SET): 5939 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5940 5941 return self.expression( 5942 exp.Cast if strict else exp.TryCast, 5943 this=this, 5944 to=to, 5945 format=fmt, 5946 safe=safe, 5947 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5948 ) 5949 5950 def _parse_string_agg(self) -> exp.Expression: 5951 if self._match(TokenType.DISTINCT): 5952 args: t.List[t.Optional[exp.Expression]] = [ 5953 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5954 ] 5955 if self._match(TokenType.COMMA): 5956 args.extend(self._parse_csv(self._parse_assignment)) 5957 else: 5958 args = self._parse_csv(self._parse_assignment) # type: ignore 5959 5960 index = self._index 5961 if not self._match(TokenType.R_PAREN) and args: 5962 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5963 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5964 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5965 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5966 5967 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5968 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5969 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5970 if not self._match_text_seq("WITHIN", "GROUP"): 5971 self._retreat(index) 5972 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5973 5974 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5975 order = self._parse_order(this=seq_get(args, 0)) 5976 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5977 5978 def _parse_convert( 5979 self, strict: bool, safe: t.Optional[bool] = None 5980 ) -> t.Optional[exp.Expression]: 5981 this = self._parse_bitwise() 5982 5983 if self._match(TokenType.USING): 5984 to: t.Optional[exp.Expression] = self.expression( 5985 exp.CharacterSet, this=self._parse_var() 5986 ) 5987 elif self._match(TokenType.COMMA): 5988 to = self._parse_types() 5989 else: 5990 to = None 5991 5992 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5993 5994 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5995 """ 5996 There are generally two variants of the DECODE function: 5997 5998 - DECODE(bin, charset) 5999 - DECODE(expression, search, result [, search, result] ... [, default]) 6000 6001 The second variant will always be parsed into a CASE expression. Note that NULL 6002 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6003 instead of relying on pattern matching. 6004 """ 6005 args = self._parse_csv(self._parse_assignment) 6006 6007 if len(args) < 3: 6008 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6009 6010 expression, *expressions = args 6011 if not expression: 6012 return None 6013 6014 ifs = [] 6015 for search, result in zip(expressions[::2], expressions[1::2]): 6016 if not search or not result: 6017 return None 6018 6019 if isinstance(search, exp.Literal): 6020 ifs.append( 6021 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6022 ) 6023 elif isinstance(search, exp.Null): 6024 ifs.append( 6025 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6026 ) 6027 else: 6028 cond = exp.or_( 6029 exp.EQ(this=expression.copy(), expression=search), 6030 exp.and_( 6031 exp.Is(this=expression.copy(), expression=exp.Null()), 6032 exp.Is(this=search.copy(), expression=exp.Null()), 6033 copy=False, 6034 ), 6035 copy=False, 6036 ) 6037 ifs.append(exp.If(this=cond, true=result)) 6038 6039 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6040 6041 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6042 self._match_text_seq("KEY") 6043 key = self._parse_column() 6044 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6045 self._match_text_seq("VALUE") 6046 value = self._parse_bitwise() 6047 6048 if not key and not value: 6049 return None 6050 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6051 6052 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6053 if not this or not self._match_text_seq("FORMAT", "JSON"): 6054 return this 6055 6056 return self.expression(exp.FormatJson, this=this) 6057 6058 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6059 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6060 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6061 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6062 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6063 else: 6064 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6065 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6066 6067 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6068 6069 if not empty and not error and not null: 6070 return None 6071 6072 return self.expression( 6073 exp.OnCondition, 6074 empty=empty, 6075 error=error, 6076 null=null, 6077 ) 6078 6079 def _parse_on_handling( 6080 self, on: str, *values: str 6081 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6082 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6083 for value in values: 6084 if self._match_text_seq(value, "ON", on): 6085 return f"{value} ON {on}" 6086 6087 index = self._index 6088 if self._match(TokenType.DEFAULT): 6089 default_value = self._parse_bitwise() 6090 if self._match_text_seq("ON", on): 6091 return default_value 6092 6093 self._retreat(index) 6094 6095 return None 6096 6097 @t.overload 6098 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6099 6100 @t.overload 6101 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6102 6103 def _parse_json_object(self, agg=False): 6104 star = self._parse_star() 6105 expressions = ( 6106 [star] 6107 if star 6108 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6109 ) 6110 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6111 6112 unique_keys = None 6113 if self._match_text_seq("WITH", "UNIQUE"): 6114 unique_keys = True 6115 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6116 unique_keys = False 6117 6118 self._match_text_seq("KEYS") 6119 6120 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6121 self._parse_type() 6122 ) 6123 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6124 6125 return self.expression( 6126 exp.JSONObjectAgg if agg else exp.JSONObject, 6127 expressions=expressions, 6128 null_handling=null_handling, 6129 unique_keys=unique_keys, 6130 return_type=return_type, 6131 encoding=encoding, 6132 ) 6133 6134 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6135 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6136 if not self._match_text_seq("NESTED"): 6137 this = self._parse_id_var() 6138 kind = self._parse_types(allow_identifiers=False) 6139 nested = None 6140 else: 6141 this = None 6142 kind = None 6143 nested = True 6144 6145 path = self._match_text_seq("PATH") and self._parse_string() 6146 nested_schema = nested and self._parse_json_schema() 6147 6148 return self.expression( 6149 exp.JSONColumnDef, 6150 this=this, 6151 kind=kind, 6152 path=path, 6153 nested_schema=nested_schema, 6154 ) 6155 6156 def _parse_json_schema(self) -> exp.JSONSchema: 6157 self._match_text_seq("COLUMNS") 6158 return self.expression( 6159 exp.JSONSchema, 6160 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6161 ) 6162 6163 def _parse_json_table(self) -> exp.JSONTable: 6164 this = self._parse_format_json(self._parse_bitwise()) 6165 path = self._match(TokenType.COMMA) and self._parse_string() 6166 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6167 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6168 schema = self._parse_json_schema() 6169 6170 return exp.JSONTable( 6171 this=this, 6172 schema=schema, 6173 path=path, 6174 error_handling=error_handling, 6175 empty_handling=empty_handling, 6176 ) 6177 6178 def _parse_match_against(self) -> exp.MatchAgainst: 6179 expressions = self._parse_csv(self._parse_column) 6180 6181 self._match_text_seq(")", "AGAINST", "(") 6182 6183 this = self._parse_string() 6184 6185 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6186 modifier = "IN NATURAL LANGUAGE MODE" 6187 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6188 modifier = f"{modifier} WITH QUERY EXPANSION" 6189 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6190 modifier = "IN BOOLEAN MODE" 6191 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6192 modifier = "WITH QUERY EXPANSION" 6193 else: 6194 modifier = None 6195 6196 return self.expression( 6197 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6198 ) 6199 6200 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6201 def _parse_open_json(self) -> exp.OpenJSON: 6202 this = self._parse_bitwise() 6203 path = self._match(TokenType.COMMA) and self._parse_string() 6204 6205 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6206 this = self._parse_field(any_token=True) 6207 kind = self._parse_types() 6208 path = self._parse_string() 6209 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6210 6211 return self.expression( 6212 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6213 ) 6214 6215 expressions = None 6216 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6217 self._match_l_paren() 6218 expressions = self._parse_csv(_parse_open_json_column_def) 6219 6220 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6221 6222 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6223 args = self._parse_csv(self._parse_bitwise) 6224 6225 if self._match(TokenType.IN): 6226 return self.expression( 6227 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6228 ) 6229 6230 if haystack_first: 6231 haystack = seq_get(args, 0) 6232 needle = seq_get(args, 1) 6233 else: 6234 needle = seq_get(args, 0) 6235 haystack = seq_get(args, 1) 6236 6237 return self.expression( 6238 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6239 ) 6240 6241 def _parse_predict(self) -> exp.Predict: 6242 self._match_text_seq("MODEL") 6243 this = self._parse_table() 6244 6245 self._match(TokenType.COMMA) 6246 self._match_text_seq("TABLE") 6247 6248 return self.expression( 6249 exp.Predict, 6250 this=this, 6251 expression=self._parse_table(), 6252 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6253 ) 6254 6255 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6256 args = self._parse_csv(self._parse_table) 6257 return exp.JoinHint(this=func_name.upper(), expressions=args) 6258 6259 def _parse_substring(self) -> exp.Substring: 6260 # Postgres supports the form: substring(string [from int] [for int]) 6261 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6262 6263 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6264 6265 if self._match(TokenType.FROM): 6266 args.append(self._parse_bitwise()) 6267 if self._match(TokenType.FOR): 6268 if len(args) == 1: 6269 args.append(exp.Literal.number(1)) 6270 args.append(self._parse_bitwise()) 6271 6272 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6273 6274 def _parse_trim(self) -> exp.Trim: 6275 # https://www.w3resource.com/sql/character-functions/trim.php 6276 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6277 6278 position = None 6279 collation = None 6280 expression = None 6281 6282 if self._match_texts(self.TRIM_TYPES): 6283 position = self._prev.text.upper() 6284 6285 this = self._parse_bitwise() 6286 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6287 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6288 expression = self._parse_bitwise() 6289 6290 if invert_order: 6291 this, expression = expression, this 6292 6293 if self._match(TokenType.COLLATE): 6294 collation = self._parse_bitwise() 6295 6296 return self.expression( 6297 exp.Trim, this=this, position=position, expression=expression, collation=collation 6298 ) 6299 6300 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6301 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6302 6303 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6304 return self._parse_window(self._parse_id_var(), alias=True) 6305 6306 def _parse_respect_or_ignore_nulls( 6307 self, this: t.Optional[exp.Expression] 6308 ) -> t.Optional[exp.Expression]: 6309 if self._match_text_seq("IGNORE", "NULLS"): 6310 return self.expression(exp.IgnoreNulls, this=this) 6311 if self._match_text_seq("RESPECT", "NULLS"): 6312 return self.expression(exp.RespectNulls, this=this) 6313 return this 6314 6315 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6316 if self._match(TokenType.HAVING): 6317 self._match_texts(("MAX", "MIN")) 6318 max = self._prev.text.upper() != "MIN" 6319 return self.expression( 6320 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6321 ) 6322 6323 return this 6324 6325 def _parse_window( 6326 self, this: t.Optional[exp.Expression], alias: bool = False 6327 ) -> t.Optional[exp.Expression]: 6328 func = this 6329 comments = func.comments if isinstance(func, exp.Expression) else None 6330 6331 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6332 self._match(TokenType.WHERE) 6333 this = self.expression( 6334 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6335 ) 6336 self._match_r_paren() 6337 6338 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6339 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6340 if self._match_text_seq("WITHIN", "GROUP"): 6341 order = self._parse_wrapped(self._parse_order) 6342 this = self.expression(exp.WithinGroup, this=this, expression=order) 6343 6344 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6345 # Some dialects choose to implement and some do not. 6346 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6347 6348 # There is some code above in _parse_lambda that handles 6349 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6350 6351 # The below changes handle 6352 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6353 6354 # Oracle allows both formats 6355 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6356 # and Snowflake chose to do the same for familiarity 6357 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6358 if isinstance(this, exp.AggFunc): 6359 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6360 6361 if ignore_respect and ignore_respect is not this: 6362 ignore_respect.replace(ignore_respect.this) 6363 this = self.expression(ignore_respect.__class__, this=this) 6364 6365 this = self._parse_respect_or_ignore_nulls(this) 6366 6367 # bigquery select from window x AS (partition by ...) 6368 if alias: 6369 over = None 6370 self._match(TokenType.ALIAS) 6371 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6372 return this 6373 else: 6374 over = self._prev.text.upper() 6375 6376 if comments and isinstance(func, exp.Expression): 6377 func.pop_comments() 6378 6379 if not self._match(TokenType.L_PAREN): 6380 return self.expression( 6381 exp.Window, 6382 comments=comments, 6383 this=this, 6384 alias=self._parse_id_var(False), 6385 over=over, 6386 ) 6387 6388 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6389 6390 first = self._match(TokenType.FIRST) 6391 if self._match_text_seq("LAST"): 6392 first = False 6393 6394 partition, order = self._parse_partition_and_order() 6395 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6396 6397 if kind: 6398 self._match(TokenType.BETWEEN) 6399 start = self._parse_window_spec() 6400 self._match(TokenType.AND) 6401 end = self._parse_window_spec() 6402 6403 spec = self.expression( 6404 exp.WindowSpec, 6405 kind=kind, 6406 start=start["value"], 6407 start_side=start["side"], 6408 end=end["value"], 6409 end_side=end["side"], 6410 ) 6411 else: 6412 spec = None 6413 6414 self._match_r_paren() 6415 6416 window = self.expression( 6417 exp.Window, 6418 comments=comments, 6419 this=this, 6420 partition_by=partition, 6421 order=order, 6422 spec=spec, 6423 alias=window_alias, 6424 over=over, 6425 first=first, 6426 ) 6427 6428 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6429 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6430 return self._parse_window(window, alias=alias) 6431 6432 return window 6433 6434 def _parse_partition_and_order( 6435 self, 6436 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6437 return self._parse_partition_by(), self._parse_order() 6438 6439 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6440 self._match(TokenType.BETWEEN) 6441 6442 return { 6443 "value": ( 6444 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6445 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6446 or self._parse_bitwise() 6447 ), 6448 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6449 } 6450 6451 def _parse_alias( 6452 self, this: t.Optional[exp.Expression], explicit: bool = False 6453 ) -> t.Optional[exp.Expression]: 6454 any_token = self._match(TokenType.ALIAS) 6455 comments = self._prev_comments or [] 6456 6457 if explicit and not any_token: 6458 return this 6459 6460 if self._match(TokenType.L_PAREN): 6461 aliases = self.expression( 6462 exp.Aliases, 6463 comments=comments, 6464 this=this, 6465 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6466 ) 6467 self._match_r_paren(aliases) 6468 return aliases 6469 6470 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6471 self.STRING_ALIASES and self._parse_string_as_identifier() 6472 ) 6473 6474 if alias: 6475 comments.extend(alias.pop_comments()) 6476 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6477 column = this.this 6478 6479 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6480 if not this.comments and column and column.comments: 6481 this.comments = column.pop_comments() 6482 6483 return this 6484 6485 def _parse_id_var( 6486 self, 6487 any_token: bool = True, 6488 tokens: t.Optional[t.Collection[TokenType]] = None, 6489 ) -> t.Optional[exp.Expression]: 6490 expression = self._parse_identifier() 6491 if not expression and ( 6492 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6493 ): 6494 quoted = self._prev.token_type == TokenType.STRING 6495 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6496 6497 return expression 6498 6499 def _parse_string(self) -> t.Optional[exp.Expression]: 6500 if self._match_set(self.STRING_PARSERS): 6501 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6502 return self._parse_placeholder() 6503 6504 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6505 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6506 6507 def _parse_number(self) -> t.Optional[exp.Expression]: 6508 if self._match_set(self.NUMERIC_PARSERS): 6509 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6510 return self._parse_placeholder() 6511 6512 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6513 if self._match(TokenType.IDENTIFIER): 6514 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6515 return self._parse_placeholder() 6516 6517 def _parse_var( 6518 self, 6519 any_token: bool = False, 6520 tokens: t.Optional[t.Collection[TokenType]] = None, 6521 upper: bool = False, 6522 ) -> t.Optional[exp.Expression]: 6523 if ( 6524 (any_token and self._advance_any()) 6525 or self._match(TokenType.VAR) 6526 or (self._match_set(tokens) if tokens else False) 6527 ): 6528 return self.expression( 6529 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6530 ) 6531 return self._parse_placeholder() 6532 6533 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6534 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6535 self._advance() 6536 return self._prev 6537 return None 6538 6539 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6540 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6541 6542 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6543 return self._parse_primary() or self._parse_var(any_token=True) 6544 6545 def _parse_null(self) -> t.Optional[exp.Expression]: 6546 if self._match_set(self.NULL_TOKENS): 6547 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6548 return self._parse_placeholder() 6549 6550 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6551 if self._match(TokenType.TRUE): 6552 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6553 if self._match(TokenType.FALSE): 6554 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6555 return self._parse_placeholder() 6556 6557 def _parse_star(self) -> t.Optional[exp.Expression]: 6558 if self._match(TokenType.STAR): 6559 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6560 return self._parse_placeholder() 6561 6562 def _parse_parameter(self) -> exp.Parameter: 6563 this = self._parse_identifier() or self._parse_primary_or_var() 6564 return self.expression(exp.Parameter, this=this) 6565 6566 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6567 if self._match_set(self.PLACEHOLDER_PARSERS): 6568 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6569 if placeholder: 6570 return placeholder 6571 self._advance(-1) 6572 return None 6573 6574 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6575 if not self._match_texts(keywords): 6576 return None 6577 if self._match(TokenType.L_PAREN, advance=False): 6578 return self._parse_wrapped_csv(self._parse_expression) 6579 6580 expression = self._parse_expression() 6581 return [expression] if expression else None 6582 6583 def _parse_csv( 6584 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6585 ) -> t.List[exp.Expression]: 6586 parse_result = parse_method() 6587 items = [parse_result] if parse_result is not None else [] 6588 6589 while self._match(sep): 6590 self._add_comments(parse_result) 6591 parse_result = parse_method() 6592 if parse_result is not None: 6593 items.append(parse_result) 6594 6595 return items 6596 6597 def _parse_tokens( 6598 self, parse_method: t.Callable, expressions: t.Dict 6599 ) -> t.Optional[exp.Expression]: 6600 this = parse_method() 6601 6602 while self._match_set(expressions): 6603 this = self.expression( 6604 expressions[self._prev.token_type], 6605 this=this, 6606 comments=self._prev_comments, 6607 expression=parse_method(), 6608 ) 6609 6610 return this 6611 6612 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6613 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6614 6615 def _parse_wrapped_csv( 6616 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6617 ) -> t.List[exp.Expression]: 6618 return self._parse_wrapped( 6619 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6620 ) 6621 6622 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6623 wrapped = self._match(TokenType.L_PAREN) 6624 if not wrapped and not optional: 6625 self.raise_error("Expecting (") 6626 parse_result = parse_method() 6627 if wrapped: 6628 self._match_r_paren() 6629 return parse_result 6630 6631 def _parse_expressions(self) -> t.List[exp.Expression]: 6632 return self._parse_csv(self._parse_expression) 6633 6634 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6635 return self._parse_select() or self._parse_set_operations( 6636 self._parse_expression() if alias else self._parse_assignment() 6637 ) 6638 6639 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6640 return self._parse_query_modifiers( 6641 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6642 ) 6643 6644 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6645 this = None 6646 if self._match_texts(self.TRANSACTION_KIND): 6647 this = self._prev.text 6648 6649 self._match_texts(("TRANSACTION", "WORK")) 6650 6651 modes = [] 6652 while True: 6653 mode = [] 6654 while self._match(TokenType.VAR): 6655 mode.append(self._prev.text) 6656 6657 if mode: 6658 modes.append(" ".join(mode)) 6659 if not self._match(TokenType.COMMA): 6660 break 6661 6662 return self.expression(exp.Transaction, this=this, modes=modes) 6663 6664 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6665 chain = None 6666 savepoint = None 6667 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6668 6669 self._match_texts(("TRANSACTION", "WORK")) 6670 6671 if self._match_text_seq("TO"): 6672 self._match_text_seq("SAVEPOINT") 6673 savepoint = self._parse_id_var() 6674 6675 if self._match(TokenType.AND): 6676 chain = not self._match_text_seq("NO") 6677 self._match_text_seq("CHAIN") 6678 6679 if is_rollback: 6680 return self.expression(exp.Rollback, savepoint=savepoint) 6681 6682 return self.expression(exp.Commit, chain=chain) 6683 6684 def _parse_refresh(self) -> exp.Refresh: 6685 self._match(TokenType.TABLE) 6686 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6687 6688 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6689 if not self._match_text_seq("ADD"): 6690 return None 6691 6692 self._match(TokenType.COLUMN) 6693 exists_column = self._parse_exists(not_=True) 6694 expression = self._parse_field_def() 6695 6696 if expression: 6697 expression.set("exists", exists_column) 6698 6699 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6700 if self._match_texts(("FIRST", "AFTER")): 6701 position = self._prev.text 6702 column_position = self.expression( 6703 exp.ColumnPosition, this=self._parse_column(), position=position 6704 ) 6705 expression.set("position", column_position) 6706 6707 return expression 6708 6709 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6710 drop = self._match(TokenType.DROP) and self._parse_drop() 6711 if drop and not isinstance(drop, exp.Command): 6712 drop.set("kind", drop.args.get("kind", "COLUMN")) 6713 return drop 6714 6715 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6716 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6717 return self.expression( 6718 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6719 ) 6720 6721 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6722 index = self._index - 1 6723 6724 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6725 return self._parse_csv( 6726 lambda: self.expression( 6727 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6728 ) 6729 ) 6730 6731 self._retreat(index) 6732 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6733 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6734 6735 if self._match_text_seq("ADD", "COLUMNS"): 6736 schema = self._parse_schema() 6737 if schema: 6738 return [schema] 6739 return [] 6740 6741 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6742 6743 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6744 if self._match_texts(self.ALTER_ALTER_PARSERS): 6745 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6746 6747 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6748 # keyword after ALTER we default to parsing this statement 6749 self._match(TokenType.COLUMN) 6750 column = self._parse_field(any_token=True) 6751 6752 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6753 return self.expression(exp.AlterColumn, this=column, drop=True) 6754 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6755 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6756 if self._match(TokenType.COMMENT): 6757 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6758 if self._match_text_seq("DROP", "NOT", "NULL"): 6759 return self.expression( 6760 exp.AlterColumn, 6761 this=column, 6762 drop=True, 6763 allow_null=True, 6764 ) 6765 if self._match_text_seq("SET", "NOT", "NULL"): 6766 return self.expression( 6767 exp.AlterColumn, 6768 this=column, 6769 allow_null=False, 6770 ) 6771 self._match_text_seq("SET", "DATA") 6772 self._match_text_seq("TYPE") 6773 return self.expression( 6774 exp.AlterColumn, 6775 this=column, 6776 dtype=self._parse_types(), 6777 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6778 using=self._match(TokenType.USING) and self._parse_assignment(), 6779 ) 6780 6781 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6782 if self._match_texts(("ALL", "EVEN", "AUTO")): 6783 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6784 6785 self._match_text_seq("KEY", "DISTKEY") 6786 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6787 6788 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6789 if compound: 6790 self._match_text_seq("SORTKEY") 6791 6792 if self._match(TokenType.L_PAREN, advance=False): 6793 return self.expression( 6794 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6795 ) 6796 6797 self._match_texts(("AUTO", "NONE")) 6798 return self.expression( 6799 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6800 ) 6801 6802 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6803 index = self._index - 1 6804 6805 partition_exists = self._parse_exists() 6806 if self._match(TokenType.PARTITION, advance=False): 6807 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6808 6809 self._retreat(index) 6810 return self._parse_csv(self._parse_drop_column) 6811 6812 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6813 if self._match(TokenType.COLUMN): 6814 exists = self._parse_exists() 6815 old_column = self._parse_column() 6816 to = self._match_text_seq("TO") 6817 new_column = self._parse_column() 6818 6819 if old_column is None or to is None or new_column is None: 6820 return None 6821 6822 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6823 6824 self._match_text_seq("TO") 6825 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6826 6827 def _parse_alter_table_set(self) -> exp.AlterSet: 6828 alter_set = self.expression(exp.AlterSet) 6829 6830 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6831 "TABLE", "PROPERTIES" 6832 ): 6833 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6834 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6835 alter_set.set("expressions", [self._parse_assignment()]) 6836 elif self._match_texts(("LOGGED", "UNLOGGED")): 6837 alter_set.set("option", exp.var(self._prev.text.upper())) 6838 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6839 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6840 elif self._match_text_seq("LOCATION"): 6841 alter_set.set("location", self._parse_field()) 6842 elif self._match_text_seq("ACCESS", "METHOD"): 6843 alter_set.set("access_method", self._parse_field()) 6844 elif self._match_text_seq("TABLESPACE"): 6845 alter_set.set("tablespace", self._parse_field()) 6846 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6847 alter_set.set("file_format", [self._parse_field()]) 6848 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6849 alter_set.set("file_format", self._parse_wrapped_options()) 6850 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6851 alter_set.set("copy_options", self._parse_wrapped_options()) 6852 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6853 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6854 else: 6855 if self._match_text_seq("SERDE"): 6856 alter_set.set("serde", self._parse_field()) 6857 6858 alter_set.set("expressions", [self._parse_properties()]) 6859 6860 return alter_set 6861 6862 def _parse_alter(self) -> exp.Alter | exp.Command: 6863 start = self._prev 6864 6865 alter_token = self._match_set(self.ALTERABLES) and self._prev 6866 if not alter_token: 6867 return self._parse_as_command(start) 6868 6869 exists = self._parse_exists() 6870 only = self._match_text_seq("ONLY") 6871 this = self._parse_table(schema=True) 6872 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6873 6874 if self._next: 6875 self._advance() 6876 6877 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6878 if parser: 6879 actions = ensure_list(parser(self)) 6880 not_valid = self._match_text_seq("NOT", "VALID") 6881 options = self._parse_csv(self._parse_property) 6882 6883 if not self._curr and actions: 6884 return self.expression( 6885 exp.Alter, 6886 this=this, 6887 kind=alter_token.text.upper(), 6888 exists=exists, 6889 actions=actions, 6890 only=only, 6891 options=options, 6892 cluster=cluster, 6893 not_valid=not_valid, 6894 ) 6895 6896 return self._parse_as_command(start) 6897 6898 def _parse_merge(self) -> exp.Merge: 6899 self._match(TokenType.INTO) 6900 target = self._parse_table() 6901 6902 if target and self._match(TokenType.ALIAS, advance=False): 6903 target.set("alias", self._parse_table_alias()) 6904 6905 self._match(TokenType.USING) 6906 using = self._parse_table() 6907 6908 self._match(TokenType.ON) 6909 on = self._parse_assignment() 6910 6911 return self.expression( 6912 exp.Merge, 6913 this=target, 6914 using=using, 6915 on=on, 6916 expressions=self._parse_when_matched(), 6917 returning=self._parse_returning(), 6918 ) 6919 6920 def _parse_when_matched(self) -> t.List[exp.When]: 6921 whens = [] 6922 6923 while self._match(TokenType.WHEN): 6924 matched = not self._match(TokenType.NOT) 6925 self._match_text_seq("MATCHED") 6926 source = ( 6927 False 6928 if self._match_text_seq("BY", "TARGET") 6929 else self._match_text_seq("BY", "SOURCE") 6930 ) 6931 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6932 6933 self._match(TokenType.THEN) 6934 6935 if self._match(TokenType.INSERT): 6936 this = self._parse_star() 6937 if this: 6938 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6939 else: 6940 then = self.expression( 6941 exp.Insert, 6942 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6943 expression=self._match_text_seq("VALUES") and self._parse_value(), 6944 ) 6945 elif self._match(TokenType.UPDATE): 6946 expressions = self._parse_star() 6947 if expressions: 6948 then = self.expression(exp.Update, expressions=expressions) 6949 else: 6950 then = self.expression( 6951 exp.Update, 6952 expressions=self._match(TokenType.SET) 6953 and self._parse_csv(self._parse_equality), 6954 ) 6955 elif self._match(TokenType.DELETE): 6956 then = self.expression(exp.Var, this=self._prev.text) 6957 else: 6958 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6959 6960 whens.append( 6961 self.expression( 6962 exp.When, 6963 matched=matched, 6964 source=source, 6965 condition=condition, 6966 then=then, 6967 ) 6968 ) 6969 return whens 6970 6971 def _parse_show(self) -> t.Optional[exp.Expression]: 6972 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6973 if parser: 6974 return parser(self) 6975 return self._parse_as_command(self._prev) 6976 6977 def _parse_set_item_assignment( 6978 self, kind: t.Optional[str] = None 6979 ) -> t.Optional[exp.Expression]: 6980 index = self._index 6981 6982 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6983 return self._parse_set_transaction(global_=kind == "GLOBAL") 6984 6985 left = self._parse_primary() or self._parse_column() 6986 assignment_delimiter = self._match_texts(("=", "TO")) 6987 6988 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6989 self._retreat(index) 6990 return None 6991 6992 right = self._parse_statement() or self._parse_id_var() 6993 if isinstance(right, (exp.Column, exp.Identifier)): 6994 right = exp.var(right.name) 6995 6996 this = self.expression(exp.EQ, this=left, expression=right) 6997 return self.expression(exp.SetItem, this=this, kind=kind) 6998 6999 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7000 self._match_text_seq("TRANSACTION") 7001 characteristics = self._parse_csv( 7002 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7003 ) 7004 return self.expression( 7005 exp.SetItem, 7006 expressions=characteristics, 7007 kind="TRANSACTION", 7008 **{"global": global_}, # type: ignore 7009 ) 7010 7011 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7012 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7013 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7014 7015 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7016 index = self._index 7017 set_ = self.expression( 7018 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7019 ) 7020 7021 if self._curr: 7022 self._retreat(index) 7023 return self._parse_as_command(self._prev) 7024 7025 return set_ 7026 7027 def _parse_var_from_options( 7028 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7029 ) -> t.Optional[exp.Var]: 7030 start = self._curr 7031 if not start: 7032 return None 7033 7034 option = start.text.upper() 7035 continuations = options.get(option) 7036 7037 index = self._index 7038 self._advance() 7039 for keywords in continuations or []: 7040 if isinstance(keywords, str): 7041 keywords = (keywords,) 7042 7043 if self._match_text_seq(*keywords): 7044 option = f"{option} {' '.join(keywords)}" 7045 break 7046 else: 7047 if continuations or continuations is None: 7048 if raise_unmatched: 7049 self.raise_error(f"Unknown option {option}") 7050 7051 self._retreat(index) 7052 return None 7053 7054 return exp.var(option) 7055 7056 def _parse_as_command(self, start: Token) -> exp.Command: 7057 while self._curr: 7058 self._advance() 7059 text = self._find_sql(start, self._prev) 7060 size = len(start.text) 7061 self._warn_unsupported() 7062 return exp.Command(this=text[:size], expression=text[size:]) 7063 7064 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7065 settings = [] 7066 7067 self._match_l_paren() 7068 kind = self._parse_id_var() 7069 7070 if self._match(TokenType.L_PAREN): 7071 while True: 7072 key = self._parse_id_var() 7073 value = self._parse_primary() 7074 7075 if not key and value is None: 7076 break 7077 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7078 self._match(TokenType.R_PAREN) 7079 7080 self._match_r_paren() 7081 7082 return self.expression( 7083 exp.DictProperty, 7084 this=this, 7085 kind=kind.this if kind else None, 7086 settings=settings, 7087 ) 7088 7089 def _parse_dict_range(self, this: str) -> exp.DictRange: 7090 self._match_l_paren() 7091 has_min = self._match_text_seq("MIN") 7092 if has_min: 7093 min = self._parse_var() or self._parse_primary() 7094 self._match_text_seq("MAX") 7095 max = self._parse_var() or self._parse_primary() 7096 else: 7097 max = self._parse_var() or self._parse_primary() 7098 min = exp.Literal.number(0) 7099 self._match_r_paren() 7100 return self.expression(exp.DictRange, this=this, min=min, max=max) 7101 7102 def _parse_comprehension( 7103 self, this: t.Optional[exp.Expression] 7104 ) -> t.Optional[exp.Comprehension]: 7105 index = self._index 7106 expression = self._parse_column() 7107 if not self._match(TokenType.IN): 7108 self._retreat(index - 1) 7109 return None 7110 iterator = self._parse_column() 7111 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7112 return self.expression( 7113 exp.Comprehension, 7114 this=this, 7115 expression=expression, 7116 iterator=iterator, 7117 condition=condition, 7118 ) 7119 7120 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7121 if self._match(TokenType.HEREDOC_STRING): 7122 return self.expression(exp.Heredoc, this=self._prev.text) 7123 7124 if not self._match_text_seq("$"): 7125 return None 7126 7127 tags = ["$"] 7128 tag_text = None 7129 7130 if self._is_connected(): 7131 self._advance() 7132 tags.append(self._prev.text.upper()) 7133 else: 7134 self.raise_error("No closing $ found") 7135 7136 if tags[-1] != "$": 7137 if self._is_connected() and self._match_text_seq("$"): 7138 tag_text = tags[-1] 7139 tags.append("$") 7140 else: 7141 self.raise_error("No closing $ found") 7142 7143 heredoc_start = self._curr 7144 7145 while self._curr: 7146 if self._match_text_seq(*tags, advance=False): 7147 this = self._find_sql(heredoc_start, self._prev) 7148 self._advance(len(tags)) 7149 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7150 7151 self._advance() 7152 7153 self.raise_error(f"No closing {''.join(tags)} found") 7154 return None 7155 7156 def _find_parser( 7157 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7158 ) -> t.Optional[t.Callable]: 7159 if not self._curr: 7160 return None 7161 7162 index = self._index 7163 this = [] 7164 while True: 7165 # The current token might be multiple words 7166 curr = self._curr.text.upper() 7167 key = curr.split(" ") 7168 this.append(curr) 7169 7170 self._advance() 7171 result, trie = in_trie(trie, key) 7172 if result == TrieResult.FAILED: 7173 break 7174 7175 if result == TrieResult.EXISTS: 7176 subparser = parsers[" ".join(this)] 7177 return subparser 7178 7179 self._retreat(index) 7180 return None 7181 7182 def _match(self, token_type, advance=True, expression=None): 7183 if not self._curr: 7184 return None 7185 7186 if self._curr.token_type == token_type: 7187 if advance: 7188 self._advance() 7189 self._add_comments(expression) 7190 return True 7191 7192 return None 7193 7194 def _match_set(self, types, advance=True): 7195 if not self._curr: 7196 return None 7197 7198 if self._curr.token_type in types: 7199 if advance: 7200 self._advance() 7201 return True 7202 7203 return None 7204 7205 def _match_pair(self, token_type_a, token_type_b, advance=True): 7206 if not self._curr or not self._next: 7207 return None 7208 7209 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7210 if advance: 7211 self._advance(2) 7212 return True 7213 7214 return None 7215 7216 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7217 if not self._match(TokenType.L_PAREN, expression=expression): 7218 self.raise_error("Expecting (") 7219 7220 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7221 if not self._match(TokenType.R_PAREN, expression=expression): 7222 self.raise_error("Expecting )") 7223 7224 def _match_texts(self, texts, advance=True): 7225 if ( 7226 self._curr 7227 and self._curr.token_type != TokenType.STRING 7228 and self._curr.text.upper() in texts 7229 ): 7230 if advance: 7231 self._advance() 7232 return True 7233 return None 7234 7235 def _match_text_seq(self, *texts, advance=True): 7236 index = self._index 7237 for text in texts: 7238 if ( 7239 self._curr 7240 and self._curr.token_type != TokenType.STRING 7241 and self._curr.text.upper() == text 7242 ): 7243 self._advance() 7244 else: 7245 self._retreat(index) 7246 return None 7247 7248 if not advance: 7249 self._retreat(index) 7250 7251 return True 7252 7253 def _replace_lambda( 7254 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7255 ) -> t.Optional[exp.Expression]: 7256 if not node: 7257 return node 7258 7259 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7260 7261 for column in node.find_all(exp.Column): 7262 typ = lambda_types.get(column.parts[0].name) 7263 if typ is not None: 7264 dot_or_id = column.to_dot() if column.table else column.this 7265 7266 if typ: 7267 dot_or_id = self.expression( 7268 exp.Cast, 7269 this=dot_or_id, 7270 to=typ, 7271 ) 7272 7273 parent = column.parent 7274 7275 while isinstance(parent, exp.Dot): 7276 if not isinstance(parent.parent, exp.Dot): 7277 parent.replace(dot_or_id) 7278 break 7279 parent = parent.parent 7280 else: 7281 if column is node: 7282 node = dot_or_id 7283 else: 7284 column.replace(dot_or_id) 7285 return node 7286 7287 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7288 start = self._prev 7289 7290 # Not to be confused with TRUNCATE(number, decimals) function call 7291 if self._match(TokenType.L_PAREN): 7292 self._retreat(self._index - 2) 7293 return self._parse_function() 7294 7295 # Clickhouse supports TRUNCATE DATABASE as well 7296 is_database = self._match(TokenType.DATABASE) 7297 7298 self._match(TokenType.TABLE) 7299 7300 exists = self._parse_exists(not_=False) 7301 7302 expressions = self._parse_csv( 7303 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7304 ) 7305 7306 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7307 7308 if self._match_text_seq("RESTART", "IDENTITY"): 7309 identity = "RESTART" 7310 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7311 identity = "CONTINUE" 7312 else: 7313 identity = None 7314 7315 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7316 option = self._prev.text 7317 else: 7318 option = None 7319 7320 partition = self._parse_partition() 7321 7322 # Fallback case 7323 if self._curr: 7324 return self._parse_as_command(start) 7325 7326 return self.expression( 7327 exp.TruncateTable, 7328 expressions=expressions, 7329 is_database=is_database, 7330 exists=exists, 7331 cluster=cluster, 7332 identity=identity, 7333 option=option, 7334 partition=partition, 7335 ) 7336 7337 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7338 this = self._parse_ordered(self._parse_opclass) 7339 7340 if not self._match(TokenType.WITH): 7341 return this 7342 7343 op = self._parse_var(any_token=True) 7344 7345 return self.expression(exp.WithOperator, this=this, op=op) 7346 7347 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7348 self._match(TokenType.EQ) 7349 self._match(TokenType.L_PAREN) 7350 7351 opts: t.List[t.Optional[exp.Expression]] = [] 7352 while self._curr and not self._match(TokenType.R_PAREN): 7353 if self._match_text_seq("FORMAT_NAME", "="): 7354 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7355 # so we parse it separately to use _parse_field() 7356 prop = self.expression( 7357 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7358 ) 7359 opts.append(prop) 7360 else: 7361 opts.append(self._parse_property()) 7362 7363 self._match(TokenType.COMMA) 7364 7365 return opts 7366 7367 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7368 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7369 7370 options = [] 7371 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7372 option = self._parse_var(any_token=True) 7373 prev = self._prev.text.upper() 7374 7375 # Different dialects might separate options and values by white space, "=" and "AS" 7376 self._match(TokenType.EQ) 7377 self._match(TokenType.ALIAS) 7378 7379 param = self.expression(exp.CopyParameter, this=option) 7380 7381 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7382 TokenType.L_PAREN, advance=False 7383 ): 7384 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7385 param.set("expressions", self._parse_wrapped_options()) 7386 elif prev == "FILE_FORMAT": 7387 # T-SQL's external file format case 7388 param.set("expression", self._parse_field()) 7389 else: 7390 param.set("expression", self._parse_unquoted_field()) 7391 7392 options.append(param) 7393 self._match(sep) 7394 7395 return options 7396 7397 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7398 expr = self.expression(exp.Credentials) 7399 7400 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7401 expr.set("storage", self._parse_field()) 7402 if self._match_text_seq("CREDENTIALS"): 7403 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7404 creds = ( 7405 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7406 ) 7407 expr.set("credentials", creds) 7408 if self._match_text_seq("ENCRYPTION"): 7409 expr.set("encryption", self._parse_wrapped_options()) 7410 if self._match_text_seq("IAM_ROLE"): 7411 expr.set("iam_role", self._parse_field()) 7412 if self._match_text_seq("REGION"): 7413 expr.set("region", self._parse_field()) 7414 7415 return expr 7416 7417 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7418 return self._parse_field() 7419 7420 def _parse_copy(self) -> exp.Copy | exp.Command: 7421 start = self._prev 7422 7423 self._match(TokenType.INTO) 7424 7425 this = ( 7426 self._parse_select(nested=True, parse_subquery_alias=False) 7427 if self._match(TokenType.L_PAREN, advance=False) 7428 else self._parse_table(schema=True) 7429 ) 7430 7431 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7432 7433 files = self._parse_csv(self._parse_file_location) 7434 credentials = self._parse_credentials() 7435 7436 self._match_text_seq("WITH") 7437 7438 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7439 7440 # Fallback case 7441 if self._curr: 7442 return self._parse_as_command(start) 7443 7444 return self.expression( 7445 exp.Copy, 7446 this=this, 7447 kind=kind, 7448 credentials=credentials, 7449 files=files, 7450 params=params, 7451 ) 7452 7453 def _parse_normalize(self) -> exp.Normalize: 7454 return self.expression( 7455 exp.Normalize, 7456 this=self._parse_bitwise(), 7457 form=self._match(TokenType.COMMA) and self._parse_var(), 7458 ) 7459 7460 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7461 if self._match_text_seq("COLUMNS", "(", advance=False): 7462 this = self._parse_function() 7463 if isinstance(this, exp.Columns): 7464 this.set("unpack", True) 7465 return this 7466 7467 return self.expression( 7468 exp.Star, 7469 **{ # type: ignore 7470 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7471 "replace": self._parse_star_op("REPLACE"), 7472 "rename": self._parse_star_op("RENAME"), 7473 }, 7474 ) 7475 7476 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7477 privilege_parts = [] 7478 7479 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7480 # (end of privilege list) or L_PAREN (start of column list) are met 7481 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7482 privilege_parts.append(self._curr.text.upper()) 7483 self._advance() 7484 7485 this = exp.var(" ".join(privilege_parts)) 7486 expressions = ( 7487 self._parse_wrapped_csv(self._parse_column) 7488 if self._match(TokenType.L_PAREN, advance=False) 7489 else None 7490 ) 7491 7492 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7493 7494 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7495 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7496 principal = self._parse_id_var() 7497 7498 if not principal: 7499 return None 7500 7501 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7502 7503 def _parse_grant(self) -> exp.Grant | exp.Command: 7504 start = self._prev 7505 7506 privileges = self._parse_csv(self._parse_grant_privilege) 7507 7508 self._match(TokenType.ON) 7509 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7510 7511 # Attempt to parse the securable e.g. MySQL allows names 7512 # such as "foo.*", "*.*" which are not easily parseable yet 7513 securable = self._try_parse(self._parse_table_parts) 7514 7515 if not securable or not self._match_text_seq("TO"): 7516 return self._parse_as_command(start) 7517 7518 principals = self._parse_csv(self._parse_grant_principal) 7519 7520 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7521 7522 if self._curr: 7523 return self._parse_as_command(start) 7524 7525 return self.expression( 7526 exp.Grant, 7527 privileges=privileges, 7528 kind=kind, 7529 securable=securable, 7530 principals=principals, 7531 grant_option=grant_option, 7532 ) 7533 7534 def _parse_overlay(self) -> exp.Overlay: 7535 return self.expression( 7536 exp.Overlay, 7537 **{ # type: ignore 7538 "this": self._parse_bitwise(), 7539 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7540 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7541 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7542 }, 7543 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 } 433 434 CREATABLES = { 435 TokenType.COLUMN, 436 TokenType.CONSTRAINT, 437 TokenType.FOREIGN_KEY, 438 TokenType.FUNCTION, 439 TokenType.INDEX, 440 TokenType.PROCEDURE, 441 *DB_CREATABLES, 442 } 443 444 ALTERABLES = { 445 TokenType.INDEX, 446 TokenType.TABLE, 447 TokenType.VIEW, 448 } 449 450 # Tokens that can represent identifiers 451 ID_VAR_TOKENS = { 452 TokenType.ALL, 453 TokenType.VAR, 454 TokenType.ANTI, 455 TokenType.APPLY, 456 TokenType.ASC, 457 TokenType.ASOF, 458 TokenType.AUTO_INCREMENT, 459 TokenType.BEGIN, 460 TokenType.BPCHAR, 461 TokenType.CACHE, 462 TokenType.CASE, 463 TokenType.COLLATE, 464 TokenType.COMMAND, 465 TokenType.COMMENT, 466 TokenType.COMMIT, 467 TokenType.CONSTRAINT, 468 TokenType.COPY, 469 TokenType.CUBE, 470 TokenType.DEFAULT, 471 TokenType.DELETE, 472 TokenType.DESC, 473 TokenType.DESCRIBE, 474 TokenType.DICTIONARY, 475 TokenType.DIV, 476 TokenType.END, 477 TokenType.EXECUTE, 478 TokenType.ESCAPE, 479 TokenType.FALSE, 480 TokenType.FIRST, 481 TokenType.FILTER, 482 TokenType.FINAL, 483 TokenType.FORMAT, 484 TokenType.FULL, 485 TokenType.IDENTIFIER, 486 TokenType.IS, 487 TokenType.ISNULL, 488 TokenType.INTERVAL, 489 TokenType.KEEP, 490 TokenType.KILL, 491 TokenType.LEFT, 492 TokenType.LOAD, 493 TokenType.MERGE, 494 TokenType.NATURAL, 495 TokenType.NEXT, 496 TokenType.OFFSET, 497 TokenType.OPERATOR, 498 TokenType.ORDINALITY, 499 TokenType.OVERLAPS, 500 TokenType.OVERWRITE, 501 TokenType.PARTITION, 502 TokenType.PERCENT, 503 TokenType.PIVOT, 504 TokenType.PRAGMA, 505 TokenType.RANGE, 506 TokenType.RECURSIVE, 507 TokenType.REFERENCES, 508 TokenType.REFRESH, 509 TokenType.RENAME, 510 TokenType.REPLACE, 511 TokenType.RIGHT, 512 TokenType.ROLLUP, 513 TokenType.ROW, 514 TokenType.ROWS, 515 TokenType.SEMI, 516 TokenType.SET, 517 TokenType.SETTINGS, 518 TokenType.SHOW, 519 TokenType.TEMPORARY, 520 TokenType.TOP, 521 TokenType.TRUE, 522 TokenType.TRUNCATE, 523 TokenType.UNIQUE, 524 TokenType.UNNEST, 525 TokenType.UNPIVOT, 526 TokenType.UPDATE, 527 TokenType.USE, 528 TokenType.VOLATILE, 529 TokenType.WINDOW, 530 *CREATABLES, 531 *SUBQUERY_PREDICATES, 532 *TYPE_TOKENS, 533 *NO_PAREN_FUNCTIONS, 534 } 535 ID_VAR_TOKENS.remove(TokenType.UNION) 536 537 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 538 539 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 540 TokenType.ANTI, 541 TokenType.APPLY, 542 TokenType.ASOF, 543 TokenType.FULL, 544 TokenType.LEFT, 545 TokenType.LOCK, 546 TokenType.NATURAL, 547 TokenType.OFFSET, 548 TokenType.RIGHT, 549 TokenType.SEMI, 550 TokenType.WINDOW, 551 } 552 553 ALIAS_TOKENS = ID_VAR_TOKENS 554 555 ARRAY_CONSTRUCTORS = { 556 "ARRAY": exp.Array, 557 "LIST": exp.List, 558 } 559 560 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 561 562 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 563 564 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 565 566 FUNC_TOKENS = { 567 TokenType.COLLATE, 568 TokenType.COMMAND, 569 TokenType.CURRENT_DATE, 570 TokenType.CURRENT_DATETIME, 571 TokenType.CURRENT_TIMESTAMP, 572 TokenType.CURRENT_TIME, 573 TokenType.CURRENT_USER, 574 TokenType.FILTER, 575 TokenType.FIRST, 576 TokenType.FORMAT, 577 TokenType.GLOB, 578 TokenType.IDENTIFIER, 579 TokenType.INDEX, 580 TokenType.ISNULL, 581 TokenType.ILIKE, 582 TokenType.INSERT, 583 TokenType.LIKE, 584 TokenType.MERGE, 585 TokenType.OFFSET, 586 TokenType.PRIMARY_KEY, 587 TokenType.RANGE, 588 TokenType.REPLACE, 589 TokenType.RLIKE, 590 TokenType.ROW, 591 TokenType.UNNEST, 592 TokenType.VAR, 593 TokenType.LEFT, 594 TokenType.RIGHT, 595 TokenType.SEQUENCE, 596 TokenType.DATE, 597 TokenType.DATETIME, 598 TokenType.TABLE, 599 TokenType.TIMESTAMP, 600 TokenType.TIMESTAMPTZ, 601 TokenType.TRUNCATE, 602 TokenType.WINDOW, 603 TokenType.XOR, 604 *TYPE_TOKENS, 605 *SUBQUERY_PREDICATES, 606 } 607 608 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.AND: exp.And, 610 } 611 612 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 613 TokenType.COLON_EQ: exp.PropertyEQ, 614 } 615 616 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.OR: exp.Or, 618 } 619 620 EQUALITY = { 621 TokenType.EQ: exp.EQ, 622 TokenType.NEQ: exp.NEQ, 623 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 624 } 625 626 COMPARISON = { 627 TokenType.GT: exp.GT, 628 TokenType.GTE: exp.GTE, 629 TokenType.LT: exp.LT, 630 TokenType.LTE: exp.LTE, 631 } 632 633 BITWISE = { 634 TokenType.AMP: exp.BitwiseAnd, 635 TokenType.CARET: exp.BitwiseXor, 636 TokenType.PIPE: exp.BitwiseOr, 637 } 638 639 TERM = { 640 TokenType.DASH: exp.Sub, 641 TokenType.PLUS: exp.Add, 642 TokenType.MOD: exp.Mod, 643 TokenType.COLLATE: exp.Collate, 644 } 645 646 FACTOR = { 647 TokenType.DIV: exp.IntDiv, 648 TokenType.LR_ARROW: exp.Distance, 649 TokenType.SLASH: exp.Div, 650 TokenType.STAR: exp.Mul, 651 } 652 653 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 654 655 TIMES = { 656 TokenType.TIME, 657 TokenType.TIMETZ, 658 } 659 660 TIMESTAMPS = { 661 TokenType.TIMESTAMP, 662 TokenType.TIMESTAMPTZ, 663 TokenType.TIMESTAMPLTZ, 664 *TIMES, 665 } 666 667 SET_OPERATIONS = { 668 TokenType.UNION, 669 TokenType.INTERSECT, 670 TokenType.EXCEPT, 671 } 672 673 JOIN_METHODS = { 674 TokenType.ASOF, 675 TokenType.NATURAL, 676 TokenType.POSITIONAL, 677 } 678 679 JOIN_SIDES = { 680 TokenType.LEFT, 681 TokenType.RIGHT, 682 TokenType.FULL, 683 } 684 685 JOIN_KINDS = { 686 TokenType.ANTI, 687 TokenType.CROSS, 688 TokenType.INNER, 689 TokenType.OUTER, 690 TokenType.SEMI, 691 TokenType.STRAIGHT_JOIN, 692 } 693 694 JOIN_HINTS: t.Set[str] = set() 695 696 LAMBDAS = { 697 TokenType.ARROW: lambda self, expressions: self.expression( 698 exp.Lambda, 699 this=self._replace_lambda( 700 self._parse_assignment(), 701 expressions, 702 ), 703 expressions=expressions, 704 ), 705 TokenType.FARROW: lambda self, expressions: self.expression( 706 exp.Kwarg, 707 this=exp.var(expressions[0].name), 708 expression=self._parse_assignment(), 709 ), 710 } 711 712 COLUMN_OPERATORS = { 713 TokenType.DOT: None, 714 TokenType.DCOLON: lambda self, this, to: self.expression( 715 exp.Cast if self.STRICT_CAST else exp.TryCast, 716 this=this, 717 to=to, 718 ), 719 TokenType.ARROW: lambda self, this, path: self.expression( 720 exp.JSONExtract, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.DARROW: lambda self, this, path: self.expression( 726 exp.JSONExtractScalar, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 732 exp.JSONBExtract, 733 this=this, 734 expression=path, 735 ), 736 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtractScalar, 738 this=this, 739 expression=path, 740 ), 741 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 742 exp.JSONBContains, 743 this=this, 744 expression=key, 745 ), 746 } 747 748 EXPRESSION_PARSERS = { 749 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 750 exp.Column: lambda self: self._parse_column(), 751 exp.Condition: lambda self: self._parse_assignment(), 752 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 753 exp.Expression: lambda self: self._parse_expression(), 754 exp.From: lambda self: self._parse_from(joins=True), 755 exp.Group: lambda self: self._parse_group(), 756 exp.Having: lambda self: self._parse_having(), 757 exp.Identifier: lambda self: self._parse_id_var(), 758 exp.Join: lambda self: self._parse_join(), 759 exp.Lambda: lambda self: self._parse_lambda(), 760 exp.Lateral: lambda self: self._parse_lateral(), 761 exp.Limit: lambda self: self._parse_limit(), 762 exp.Offset: lambda self: self._parse_offset(), 763 exp.Order: lambda self: self._parse_order(), 764 exp.Ordered: lambda self: self._parse_ordered(), 765 exp.Properties: lambda self: self._parse_properties(), 766 exp.Qualify: lambda self: self._parse_qualify(), 767 exp.Returning: lambda self: self._parse_returning(), 768 exp.Select: lambda self: self._parse_select(), 769 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 770 exp.Table: lambda self: self._parse_table_parts(), 771 exp.TableAlias: lambda self: self._parse_table_alias(), 772 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 773 exp.Where: lambda self: self._parse_where(), 774 exp.Window: lambda self: self._parse_named_window(), 775 exp.With: lambda self: self._parse_with(), 776 "JOIN_TYPE": lambda self: self._parse_join_parts(), 777 } 778 779 STATEMENT_PARSERS = { 780 TokenType.ALTER: lambda self: self._parse_alter(), 781 TokenType.BEGIN: lambda self: self._parse_transaction(), 782 TokenType.CACHE: lambda self: self._parse_cache(), 783 TokenType.COMMENT: lambda self: self._parse_comment(), 784 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 785 TokenType.COPY: lambda self: self._parse_copy(), 786 TokenType.CREATE: lambda self: self._parse_create(), 787 TokenType.DELETE: lambda self: self._parse_delete(), 788 TokenType.DESC: lambda self: self._parse_describe(), 789 TokenType.DESCRIBE: lambda self: self._parse_describe(), 790 TokenType.DROP: lambda self: self._parse_drop(), 791 TokenType.GRANT: lambda self: self._parse_grant(), 792 TokenType.INSERT: lambda self: self._parse_insert(), 793 TokenType.KILL: lambda self: self._parse_kill(), 794 TokenType.LOAD: lambda self: self._parse_load(), 795 TokenType.MERGE: lambda self: self._parse_merge(), 796 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 797 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 798 TokenType.REFRESH: lambda self: self._parse_refresh(), 799 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 800 TokenType.SET: lambda self: self._parse_set(), 801 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 802 TokenType.UNCACHE: lambda self: self._parse_uncache(), 803 TokenType.UPDATE: lambda self: self._parse_update(), 804 TokenType.USE: lambda self: self.expression( 805 exp.Use, 806 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 807 this=self._parse_table(schema=False), 808 ), 809 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 810 } 811 812 UNARY_PARSERS = { 813 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 814 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 815 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 816 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 817 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 818 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 819 } 820 821 STRING_PARSERS = { 822 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 823 exp.RawString, this=token.text 824 ), 825 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 826 exp.National, this=token.text 827 ), 828 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 829 TokenType.STRING: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=True 831 ), 832 TokenType.UNICODE_STRING: lambda self, token: self.expression( 833 exp.UnicodeString, 834 this=token.text, 835 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 836 ), 837 } 838 839 NUMERIC_PARSERS = { 840 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 841 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 842 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 843 TokenType.NUMBER: lambda self, token: self.expression( 844 exp.Literal, this=token.text, is_string=False 845 ), 846 } 847 848 PRIMARY_PARSERS = { 849 **STRING_PARSERS, 850 **NUMERIC_PARSERS, 851 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 852 TokenType.NULL: lambda self, _: self.expression(exp.Null), 853 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 854 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 855 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 856 TokenType.STAR: lambda self, _: self._parse_star_ops(), 857 } 858 859 PLACEHOLDER_PARSERS = { 860 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 861 TokenType.PARAMETER: lambda self: self._parse_parameter(), 862 TokenType.COLON: lambda self: ( 863 self.expression(exp.Placeholder, this=self._prev.text) 864 if self._match_set(self.ID_VAR_TOKENS) 865 else None 866 ), 867 } 868 869 RANGE_PARSERS = { 870 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 871 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 872 TokenType.GLOB: binary_range_parser(exp.Glob), 873 TokenType.ILIKE: binary_range_parser(exp.ILike), 874 TokenType.IN: lambda self, this: self._parse_in(this), 875 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 876 TokenType.IS: lambda self, this: self._parse_is(this), 877 TokenType.LIKE: binary_range_parser(exp.Like), 878 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 879 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 880 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 881 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 882 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 883 } 884 885 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 886 "ALLOWED_VALUES": lambda self: self.expression( 887 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 888 ), 889 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 890 "AUTO": lambda self: self._parse_auto_property(), 891 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 892 "BACKUP": lambda self: self.expression( 893 exp.BackupProperty, this=self._parse_var(any_token=True) 894 ), 895 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 896 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 897 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHECKSUM": lambda self: self._parse_checksum(), 899 "CLUSTER BY": lambda self: self._parse_cluster(), 900 "CLUSTERED": lambda self: self._parse_clustered_by(), 901 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 902 exp.CollateProperty, **kwargs 903 ), 904 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 905 "CONTAINS": lambda self: self._parse_contains_property(), 906 "COPY": lambda self: self._parse_copy_property(), 907 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 908 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 909 "DEFINER": lambda self: self._parse_definer(), 910 "DETERMINISTIC": lambda self: self.expression( 911 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 912 ), 913 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 914 "DUPLICATE": lambda self: self._parse_duplicate(), 915 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 916 "DISTKEY": lambda self: self._parse_distkey(), 917 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 918 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 919 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 920 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 921 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 922 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 923 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 924 "FREESPACE": lambda self: self._parse_freespace(), 925 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 926 "HEAP": lambda self: self.expression(exp.HeapProperty), 927 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 928 "IMMUTABLE": lambda self: self.expression( 929 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 930 ), 931 "INHERITS": lambda self: self.expression( 932 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 933 ), 934 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 935 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 936 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 937 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 938 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 939 "LIKE": lambda self: self._parse_create_like(), 940 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 941 "LOCK": lambda self: self._parse_locking(), 942 "LOCKING": lambda self: self._parse_locking(), 943 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 944 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 945 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 946 "MODIFIES": lambda self: self._parse_modifies_property(), 947 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 948 "NO": lambda self: self._parse_no_property(), 949 "ON": lambda self: self._parse_on_property(), 950 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 951 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 952 "PARTITION": lambda self: self._parse_partitioned_of(), 953 "PARTITION BY": lambda self: self._parse_partitioned_by(), 954 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 956 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 957 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 958 "READS": lambda self: self._parse_reads_property(), 959 "REMOTE": lambda self: self._parse_remote_with_connection(), 960 "RETURNS": lambda self: self._parse_returns(), 961 "STRICT": lambda self: self.expression(exp.StrictProperty), 962 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 963 "ROW": lambda self: self._parse_row(), 964 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 965 "SAMPLE": lambda self: self.expression( 966 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 967 ), 968 "SECURE": lambda self: self.expression(exp.SecureProperty), 969 "SECURITY": lambda self: self._parse_security(), 970 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 971 "SETTINGS": lambda self: self._parse_settings_property(), 972 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 973 "SORTKEY": lambda self: self._parse_sortkey(), 974 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 975 "STABLE": lambda self: self.expression( 976 exp.StabilityProperty, this=exp.Literal.string("STABLE") 977 ), 978 "STORED": lambda self: self._parse_stored(), 979 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 980 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 981 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 982 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 983 "TO": lambda self: self._parse_to_table(), 984 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 985 "TRANSFORM": lambda self: self.expression( 986 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 987 ), 988 "TTL": lambda self: self._parse_ttl(), 989 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 990 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 991 "VOLATILE": lambda self: self._parse_volatile_property(), 992 "WITH": lambda self: self._parse_with_property(), 993 } 994 995 CONSTRAINT_PARSERS = { 996 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 997 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 998 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 999 "CHARACTER SET": lambda self: self.expression( 1000 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1001 ), 1002 "CHECK": lambda self: self.expression( 1003 exp.CheckColumnConstraint, 1004 this=self._parse_wrapped(self._parse_assignment), 1005 enforced=self._match_text_seq("ENFORCED"), 1006 ), 1007 "COLLATE": lambda self: self.expression( 1008 exp.CollateColumnConstraint, 1009 this=self._parse_identifier() or self._parse_column(), 1010 ), 1011 "COMMENT": lambda self: self.expression( 1012 exp.CommentColumnConstraint, this=self._parse_string() 1013 ), 1014 "COMPRESS": lambda self: self._parse_compress(), 1015 "CLUSTERED": lambda self: self.expression( 1016 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1017 ), 1018 "NONCLUSTERED": lambda self: self.expression( 1019 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1020 ), 1021 "DEFAULT": lambda self: self.expression( 1022 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1023 ), 1024 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1025 "EPHEMERAL": lambda self: self.expression( 1026 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1027 ), 1028 "EXCLUDE": lambda self: self.expression( 1029 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1030 ), 1031 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1032 "FORMAT": lambda self: self.expression( 1033 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "GENERATED": lambda self: self._parse_generated_as_identity(), 1036 "IDENTITY": lambda self: self._parse_auto_increment(), 1037 "INLINE": lambda self: self._parse_inline(), 1038 "LIKE": lambda self: self._parse_create_like(), 1039 "NOT": lambda self: self._parse_not_constraint(), 1040 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1041 "ON": lambda self: ( 1042 self._match(TokenType.UPDATE) 1043 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1044 ) 1045 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1046 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1047 "PERIOD": lambda self: self._parse_period_for_system_time(), 1048 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1049 "REFERENCES": lambda self: self._parse_references(match=False), 1050 "TITLE": lambda self: self.expression( 1051 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1054 "UNIQUE": lambda self: self._parse_unique(), 1055 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1056 "WITH": lambda self: self.expression( 1057 exp.Properties, expressions=self._parse_wrapped_properties() 1058 ), 1059 } 1060 1061 ALTER_PARSERS = { 1062 "ADD": lambda self: self._parse_alter_table_add(), 1063 "AS": lambda self: self._parse_select(), 1064 "ALTER": lambda self: self._parse_alter_table_alter(), 1065 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1066 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1067 "DROP": lambda self: self._parse_alter_table_drop(), 1068 "RENAME": lambda self: self._parse_alter_table_rename(), 1069 "SET": lambda self: self._parse_alter_table_set(), 1070 "SWAP": lambda self: self.expression( 1071 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1072 ), 1073 } 1074 1075 ALTER_ALTER_PARSERS = { 1076 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1077 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1078 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1079 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1080 } 1081 1082 SCHEMA_UNNAMED_CONSTRAINTS = { 1083 "CHECK", 1084 "EXCLUDE", 1085 "FOREIGN KEY", 1086 "LIKE", 1087 "PERIOD", 1088 "PRIMARY KEY", 1089 "UNIQUE", 1090 } 1091 1092 NO_PAREN_FUNCTION_PARSERS = { 1093 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1094 "CASE": lambda self: self._parse_case(), 1095 "CONNECT_BY_ROOT": lambda self: self.expression( 1096 exp.ConnectByRoot, this=self._parse_column() 1097 ), 1098 "IF": lambda self: self._parse_if(), 1099 "NEXT": lambda self: self._parse_next_value_for(), 1100 } 1101 1102 INVALID_FUNC_NAME_TOKENS = { 1103 TokenType.IDENTIFIER, 1104 TokenType.STRING, 1105 } 1106 1107 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1108 1109 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1110 1111 FUNCTION_PARSERS = { 1112 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1113 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1114 "DECODE": lambda self: self._parse_decode(), 1115 "EXTRACT": lambda self: self._parse_extract(), 1116 "GAP_FILL": lambda self: self._parse_gap_fill(), 1117 "JSON_OBJECT": lambda self: self._parse_json_object(), 1118 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1119 "JSON_TABLE": lambda self: self._parse_json_table(), 1120 "MATCH": lambda self: self._parse_match_against(), 1121 "NORMALIZE": lambda self: self._parse_normalize(), 1122 "OPENJSON": lambda self: self._parse_open_json(), 1123 "OVERLAY": lambda self: self._parse_overlay(), 1124 "POSITION": lambda self: self._parse_position(), 1125 "PREDICT": lambda self: self._parse_predict(), 1126 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1127 "STRING_AGG": lambda self: self._parse_string_agg(), 1128 "SUBSTRING": lambda self: self._parse_substring(), 1129 "TRIM": lambda self: self._parse_trim(), 1130 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1131 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1132 } 1133 1134 QUERY_MODIFIER_PARSERS = { 1135 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1136 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1137 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1138 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1139 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1140 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1141 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1142 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1143 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1144 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1145 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1146 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1147 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1148 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1149 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.CLUSTER_BY: lambda self: ( 1151 "cluster", 1152 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1153 ), 1154 TokenType.DISTRIBUTE_BY: lambda self: ( 1155 "distribute", 1156 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1157 ), 1158 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1159 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1160 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1161 } 1162 1163 SET_PARSERS = { 1164 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1165 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1166 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1167 "TRANSACTION": lambda self: self._parse_set_transaction(), 1168 } 1169 1170 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1171 1172 TYPE_LITERAL_PARSERS = { 1173 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1174 } 1175 1176 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1177 1178 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1179 1180 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1181 1182 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1183 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1184 "ISOLATION": ( 1185 ("LEVEL", "REPEATABLE", "READ"), 1186 ("LEVEL", "READ", "COMMITTED"), 1187 ("LEVEL", "READ", "UNCOMITTED"), 1188 ("LEVEL", "SERIALIZABLE"), 1189 ), 1190 "READ": ("WRITE", "ONLY"), 1191 } 1192 1193 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1194 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1195 ) 1196 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1197 1198 CREATE_SEQUENCE: OPTIONS_TYPE = { 1199 "SCALE": ("EXTEND", "NOEXTEND"), 1200 "SHARD": ("EXTEND", "NOEXTEND"), 1201 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1202 **dict.fromkeys( 1203 ( 1204 "SESSION", 1205 "GLOBAL", 1206 "KEEP", 1207 "NOKEEP", 1208 "ORDER", 1209 "NOORDER", 1210 "NOCACHE", 1211 "CYCLE", 1212 "NOCYCLE", 1213 "NOMINVALUE", 1214 "NOMAXVALUE", 1215 "NOSCALE", 1216 "NOSHARD", 1217 ), 1218 tuple(), 1219 ), 1220 } 1221 1222 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1223 1224 USABLES: OPTIONS_TYPE = dict.fromkeys( 1225 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1226 ) 1227 1228 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1229 1230 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1231 "TYPE": ("EVOLUTION",), 1232 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1233 } 1234 1235 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1236 1237 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1238 1239 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1240 "NOT": ("ENFORCED",), 1241 "MATCH": ( 1242 "FULL", 1243 "PARTIAL", 1244 "SIMPLE", 1245 ), 1246 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1247 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1248 } 1249 1250 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1251 1252 CLONE_KEYWORDS = {"CLONE", "COPY"} 1253 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1254 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1255 1256 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1257 1258 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1259 1260 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1261 1262 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1263 1264 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1265 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1266 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1267 1268 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1269 1270 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1271 1272 ADD_CONSTRAINT_TOKENS = { 1273 TokenType.CONSTRAINT, 1274 TokenType.FOREIGN_KEY, 1275 TokenType.INDEX, 1276 TokenType.KEY, 1277 TokenType.PRIMARY_KEY, 1278 TokenType.UNIQUE, 1279 } 1280 1281 DISTINCT_TOKENS = {TokenType.DISTINCT} 1282 1283 NULL_TOKENS = {TokenType.NULL} 1284 1285 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1286 1287 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1288 1289 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1290 1291 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1292 1293 ODBC_DATETIME_LITERALS = { 1294 "d": exp.Date, 1295 "t": exp.Time, 1296 "ts": exp.Timestamp, 1297 } 1298 1299 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1300 1301 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1302 1303 # The style options for the DESCRIBE statement 1304 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1305 1306 OPERATION_MODIFIERS: t.Set[str] = set() 1307 1308 STRICT_CAST = True 1309 1310 PREFIXED_PIVOT_COLUMNS = False 1311 IDENTIFY_PIVOT_STRINGS = False 1312 1313 LOG_DEFAULTS_TO_LN = False 1314 1315 # Whether ADD is present for each column added by ALTER TABLE 1316 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1317 1318 # Whether the table sample clause expects CSV syntax 1319 TABLESAMPLE_CSV = False 1320 1321 # The default method used for table sampling 1322 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1323 1324 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1325 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1326 1327 # Whether the TRIM function expects the characters to trim as its first argument 1328 TRIM_PATTERN_FIRST = False 1329 1330 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1331 STRING_ALIASES = False 1332 1333 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1334 MODIFIERS_ATTACHED_TO_SET_OP = True 1335 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1336 1337 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1338 NO_PAREN_IF_COMMANDS = True 1339 1340 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1341 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1342 1343 # Whether the `:` operator is used to extract a value from a VARIANT column 1344 COLON_IS_VARIANT_EXTRACT = False 1345 1346 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1347 # If this is True and '(' is not found, the keyword will be treated as an identifier 1348 VALUES_FOLLOWED_BY_PAREN = True 1349 1350 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1351 SUPPORTS_IMPLICIT_UNNEST = False 1352 1353 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1354 INTERVAL_SPANS = True 1355 1356 # Whether a PARTITION clause can follow a table reference 1357 SUPPORTS_PARTITION_SELECTION = False 1358 1359 __slots__ = ( 1360 "error_level", 1361 "error_message_context", 1362 "max_errors", 1363 "dialect", 1364 "sql", 1365 "errors", 1366 "_tokens", 1367 "_index", 1368 "_curr", 1369 "_next", 1370 "_prev", 1371 "_prev_comments", 1372 ) 1373 1374 # Autofilled 1375 SHOW_TRIE: t.Dict = {} 1376 SET_TRIE: t.Dict = {} 1377 1378 def __init__( 1379 self, 1380 error_level: t.Optional[ErrorLevel] = None, 1381 error_message_context: int = 100, 1382 max_errors: int = 3, 1383 dialect: DialectType = None, 1384 ): 1385 from sqlglot.dialects import Dialect 1386 1387 self.error_level = error_level or ErrorLevel.IMMEDIATE 1388 self.error_message_context = error_message_context 1389 self.max_errors = max_errors 1390 self.dialect = Dialect.get_or_raise(dialect) 1391 self.reset() 1392 1393 def reset(self): 1394 self.sql = "" 1395 self.errors = [] 1396 self._tokens = [] 1397 self._index = 0 1398 self._curr = None 1399 self._next = None 1400 self._prev = None 1401 self._prev_comments = None 1402 1403 def parse( 1404 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens and returns a list of syntax trees, one tree 1408 per parsed SQL statement. 1409 1410 Args: 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The list of the produced syntax trees. 1416 """ 1417 return self._parse( 1418 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1419 ) 1420 1421 def parse_into( 1422 self, 1423 expression_types: exp.IntoType, 1424 raw_tokens: t.List[Token], 1425 sql: t.Optional[str] = None, 1426 ) -> t.List[t.Optional[exp.Expression]]: 1427 """ 1428 Parses a list of tokens into a given Expression type. If a collection of Expression 1429 types is given instead, this method will try to parse the token list into each one 1430 of them, stopping at the first for which the parsing succeeds. 1431 1432 Args: 1433 expression_types: The expression type(s) to try and parse the token list into. 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The target Expression. 1439 """ 1440 errors = [] 1441 for expression_type in ensure_list(expression_types): 1442 parser = self.EXPRESSION_PARSERS.get(expression_type) 1443 if not parser: 1444 raise TypeError(f"No parser registered for {expression_type}") 1445 1446 try: 1447 return self._parse(parser, raw_tokens, sql) 1448 except ParseError as e: 1449 e.errors[0]["into_expression"] = expression_type 1450 errors.append(e) 1451 1452 raise ParseError( 1453 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1454 errors=merge_errors(errors), 1455 ) from errors[-1] 1456 1457 def _parse( 1458 self, 1459 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1460 raw_tokens: t.List[Token], 1461 sql: t.Optional[str] = None, 1462 ) -> t.List[t.Optional[exp.Expression]]: 1463 self.reset() 1464 self.sql = sql or "" 1465 1466 total = len(raw_tokens) 1467 chunks: t.List[t.List[Token]] = [[]] 1468 1469 for i, token in enumerate(raw_tokens): 1470 if token.token_type == TokenType.SEMICOLON: 1471 if token.comments: 1472 chunks.append([token]) 1473 1474 if i < total - 1: 1475 chunks.append([]) 1476 else: 1477 chunks[-1].append(token) 1478 1479 expressions = [] 1480 1481 for tokens in chunks: 1482 self._index = -1 1483 self._tokens = tokens 1484 self._advance() 1485 1486 expressions.append(parse_method(self)) 1487 1488 if self._index < len(self._tokens): 1489 self.raise_error("Invalid expression / Unexpected token") 1490 1491 self.check_errors() 1492 1493 return expressions 1494 1495 def check_errors(self) -> None: 1496 """Logs or raises any found errors, depending on the chosen error level setting.""" 1497 if self.error_level == ErrorLevel.WARN: 1498 for error in self.errors: 1499 logger.error(str(error)) 1500 elif self.error_level == ErrorLevel.RAISE and self.errors: 1501 raise ParseError( 1502 concat_messages(self.errors, self.max_errors), 1503 errors=merge_errors(self.errors), 1504 ) 1505 1506 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1507 """ 1508 Appends an error in the list of recorded errors or raises it, depending on the chosen 1509 error level setting. 1510 """ 1511 token = token or self._curr or self._prev or Token.string("") 1512 start = token.start 1513 end = token.end + 1 1514 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1515 highlight = self.sql[start:end] 1516 end_context = self.sql[end : end + self.error_message_context] 1517 1518 error = ParseError.new( 1519 f"{message}. Line {token.line}, Col: {token.col}.\n" 1520 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1521 description=message, 1522 line=token.line, 1523 col=token.col, 1524 start_context=start_context, 1525 highlight=highlight, 1526 end_context=end_context, 1527 ) 1528 1529 if self.error_level == ErrorLevel.IMMEDIATE: 1530 raise error 1531 1532 self.errors.append(error) 1533 1534 def expression( 1535 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1536 ) -> E: 1537 """ 1538 Creates a new, validated Expression. 1539 1540 Args: 1541 exp_class: The expression class to instantiate. 1542 comments: An optional list of comments to attach to the expression. 1543 kwargs: The arguments to set for the expression along with their respective values. 1544 1545 Returns: 1546 The target expression. 1547 """ 1548 instance = exp_class(**kwargs) 1549 instance.add_comments(comments) if comments else self._add_comments(instance) 1550 return self.validate_expression(instance) 1551 1552 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1553 if expression and self._prev_comments: 1554 expression.add_comments(self._prev_comments) 1555 self._prev_comments = None 1556 1557 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1558 """ 1559 Validates an Expression, making sure that all its mandatory arguments are set. 1560 1561 Args: 1562 expression: The expression to validate. 1563 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1564 1565 Returns: 1566 The validated expression. 1567 """ 1568 if self.error_level != ErrorLevel.IGNORE: 1569 for error_message in expression.error_messages(args): 1570 self.raise_error(error_message) 1571 1572 return expression 1573 1574 def _find_sql(self, start: Token, end: Token) -> str: 1575 return self.sql[start.start : end.end + 1] 1576 1577 def _is_connected(self) -> bool: 1578 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1579 1580 def _advance(self, times: int = 1) -> None: 1581 self._index += times 1582 self._curr = seq_get(self._tokens, self._index) 1583 self._next = seq_get(self._tokens, self._index + 1) 1584 1585 if self._index > 0: 1586 self._prev = self._tokens[self._index - 1] 1587 self._prev_comments = self._prev.comments 1588 else: 1589 self._prev = None 1590 self._prev_comments = None 1591 1592 def _retreat(self, index: int) -> None: 1593 if index != self._index: 1594 self._advance(index - self._index) 1595 1596 def _warn_unsupported(self) -> None: 1597 if len(self._tokens) <= 1: 1598 return 1599 1600 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1601 # interested in emitting a warning for the one being currently processed. 1602 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1603 1604 logger.warning( 1605 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1606 ) 1607 1608 def _parse_command(self) -> exp.Command: 1609 self._warn_unsupported() 1610 return self.expression( 1611 exp.Command, 1612 comments=self._prev_comments, 1613 this=self._prev.text.upper(), 1614 expression=self._parse_string(), 1615 ) 1616 1617 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1618 """ 1619 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1620 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1621 solve this by setting & resetting the parser state accordingly 1622 """ 1623 index = self._index 1624 error_level = self.error_level 1625 1626 self.error_level = ErrorLevel.IMMEDIATE 1627 try: 1628 this = parse_method() 1629 except ParseError: 1630 this = None 1631 finally: 1632 if not this or retreat: 1633 self._retreat(index) 1634 self.error_level = error_level 1635 1636 return this 1637 1638 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1639 start = self._prev 1640 exists = self._parse_exists() if allow_exists else None 1641 1642 self._match(TokenType.ON) 1643 1644 materialized = self._match_text_seq("MATERIALIZED") 1645 kind = self._match_set(self.CREATABLES) and self._prev 1646 if not kind: 1647 return self._parse_as_command(start) 1648 1649 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=kind.token_type) 1651 elif kind.token_type == TokenType.TABLE: 1652 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1653 elif kind.token_type == TokenType.COLUMN: 1654 this = self._parse_column() 1655 else: 1656 this = self._parse_id_var() 1657 1658 self._match(TokenType.IS) 1659 1660 return self.expression( 1661 exp.Comment, 1662 this=this, 1663 kind=kind.text, 1664 expression=self._parse_string(), 1665 exists=exists, 1666 materialized=materialized, 1667 ) 1668 1669 def _parse_to_table( 1670 self, 1671 ) -> exp.ToTableProperty: 1672 table = self._parse_table_parts(schema=True) 1673 return self.expression(exp.ToTableProperty, this=table) 1674 1675 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1676 def _parse_ttl(self) -> exp.Expression: 1677 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1678 this = self._parse_bitwise() 1679 1680 if self._match_text_seq("DELETE"): 1681 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1682 if self._match_text_seq("RECOMPRESS"): 1683 return self.expression( 1684 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1685 ) 1686 if self._match_text_seq("TO", "DISK"): 1687 return self.expression( 1688 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1689 ) 1690 if self._match_text_seq("TO", "VOLUME"): 1691 return self.expression( 1692 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1693 ) 1694 1695 return this 1696 1697 expressions = self._parse_csv(_parse_ttl_action) 1698 where = self._parse_where() 1699 group = self._parse_group() 1700 1701 aggregates = None 1702 if group and self._match(TokenType.SET): 1703 aggregates = self._parse_csv(self._parse_set_item) 1704 1705 return self.expression( 1706 exp.MergeTreeTTL, 1707 expressions=expressions, 1708 where=where, 1709 group=group, 1710 aggregates=aggregates, 1711 ) 1712 1713 def _parse_statement(self) -> t.Optional[exp.Expression]: 1714 if self._curr is None: 1715 return None 1716 1717 if self._match_set(self.STATEMENT_PARSERS): 1718 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1719 1720 if self._match_set(self.dialect.tokenizer.COMMANDS): 1721 return self._parse_command() 1722 1723 expression = self._parse_expression() 1724 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1725 return self._parse_query_modifiers(expression) 1726 1727 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1728 start = self._prev 1729 temporary = self._match(TokenType.TEMPORARY) 1730 materialized = self._match_text_seq("MATERIALIZED") 1731 1732 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1733 if not kind: 1734 return self._parse_as_command(start) 1735 1736 concurrently = self._match_text_seq("CONCURRENTLY") 1737 if_exists = exists or self._parse_exists() 1738 table = self._parse_table_parts( 1739 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1740 ) 1741 1742 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1743 1744 if self._match(TokenType.L_PAREN, advance=False): 1745 expressions = self._parse_wrapped_csv(self._parse_types) 1746 else: 1747 expressions = None 1748 1749 return self.expression( 1750 exp.Drop, 1751 comments=start.comments, 1752 exists=if_exists, 1753 this=table, 1754 expressions=expressions, 1755 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1756 temporary=temporary, 1757 materialized=materialized, 1758 cascade=self._match_text_seq("CASCADE"), 1759 constraints=self._match_text_seq("CONSTRAINTS"), 1760 purge=self._match_text_seq("PURGE"), 1761 cluster=cluster, 1762 concurrently=concurrently, 1763 ) 1764 1765 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1766 return ( 1767 self._match_text_seq("IF") 1768 and (not not_ or self._match(TokenType.NOT)) 1769 and self._match(TokenType.EXISTS) 1770 ) 1771 1772 def _parse_create(self) -> exp.Create | exp.Command: 1773 # Note: this can't be None because we've matched a statement parser 1774 start = self._prev 1775 comments = self._prev_comments 1776 1777 replace = ( 1778 start.token_type == TokenType.REPLACE 1779 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1780 or self._match_pair(TokenType.OR, TokenType.ALTER) 1781 ) 1782 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1783 1784 unique = self._match(TokenType.UNIQUE) 1785 1786 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1787 clustered = True 1788 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1789 "COLUMNSTORE" 1790 ): 1791 clustered = False 1792 else: 1793 clustered = None 1794 1795 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1796 self._advance() 1797 1798 properties = None 1799 create_token = self._match_set(self.CREATABLES) and self._prev 1800 1801 if not create_token: 1802 # exp.Properties.Location.POST_CREATE 1803 properties = self._parse_properties() 1804 create_token = self._match_set(self.CREATABLES) and self._prev 1805 1806 if not properties or not create_token: 1807 return self._parse_as_command(start) 1808 1809 concurrently = self._match_text_seq("CONCURRENTLY") 1810 exists = self._parse_exists(not_=True) 1811 this = None 1812 expression: t.Optional[exp.Expression] = None 1813 indexes = None 1814 no_schema_binding = None 1815 begin = None 1816 end = None 1817 clone = None 1818 1819 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1820 nonlocal properties 1821 if properties and temp_props: 1822 properties.expressions.extend(temp_props.expressions) 1823 elif temp_props: 1824 properties = temp_props 1825 1826 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1827 this = self._parse_user_defined_function(kind=create_token.token_type) 1828 1829 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1830 extend_props(self._parse_properties()) 1831 1832 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1833 extend_props(self._parse_properties()) 1834 1835 if not expression: 1836 if self._match(TokenType.COMMAND): 1837 expression = self._parse_as_command(self._prev) 1838 else: 1839 begin = self._match(TokenType.BEGIN) 1840 return_ = self._match_text_seq("RETURN") 1841 1842 if self._match(TokenType.STRING, advance=False): 1843 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1844 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1845 expression = self._parse_string() 1846 extend_props(self._parse_properties()) 1847 else: 1848 expression = self._parse_statement() 1849 1850 end = self._match_text_seq("END") 1851 1852 if return_: 1853 expression = self.expression(exp.Return, this=expression) 1854 elif create_token.token_type == TokenType.INDEX: 1855 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1856 if not self._match(TokenType.ON): 1857 index = self._parse_id_var() 1858 anonymous = False 1859 else: 1860 index = None 1861 anonymous = True 1862 1863 this = self._parse_index(index=index, anonymous=anonymous) 1864 elif create_token.token_type in self.DB_CREATABLES: 1865 table_parts = self._parse_table_parts( 1866 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1867 ) 1868 1869 # exp.Properties.Location.POST_NAME 1870 self._match(TokenType.COMMA) 1871 extend_props(self._parse_properties(before=True)) 1872 1873 this = self._parse_schema(this=table_parts) 1874 1875 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1876 extend_props(self._parse_properties()) 1877 1878 self._match(TokenType.ALIAS) 1879 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1880 # exp.Properties.Location.POST_ALIAS 1881 extend_props(self._parse_properties()) 1882 1883 if create_token.token_type == TokenType.SEQUENCE: 1884 expression = self._parse_types() 1885 extend_props(self._parse_properties()) 1886 else: 1887 expression = self._parse_ddl_select() 1888 1889 if create_token.token_type == TokenType.TABLE: 1890 # exp.Properties.Location.POST_EXPRESSION 1891 extend_props(self._parse_properties()) 1892 1893 indexes = [] 1894 while True: 1895 index = self._parse_index() 1896 1897 # exp.Properties.Location.POST_INDEX 1898 extend_props(self._parse_properties()) 1899 if not index: 1900 break 1901 else: 1902 self._match(TokenType.COMMA) 1903 indexes.append(index) 1904 elif create_token.token_type == TokenType.VIEW: 1905 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1906 no_schema_binding = True 1907 1908 shallow = self._match_text_seq("SHALLOW") 1909 1910 if self._match_texts(self.CLONE_KEYWORDS): 1911 copy = self._prev.text.lower() == "copy" 1912 clone = self.expression( 1913 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1914 ) 1915 1916 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1917 return self._parse_as_command(start) 1918 1919 create_kind_text = create_token.text.upper() 1920 return self.expression( 1921 exp.Create, 1922 comments=comments, 1923 this=this, 1924 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1925 replace=replace, 1926 refresh=refresh, 1927 unique=unique, 1928 expression=expression, 1929 exists=exists, 1930 properties=properties, 1931 indexes=indexes, 1932 no_schema_binding=no_schema_binding, 1933 begin=begin, 1934 end=end, 1935 clone=clone, 1936 concurrently=concurrently, 1937 clustered=clustered, 1938 ) 1939 1940 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1941 seq = exp.SequenceProperties() 1942 1943 options = [] 1944 index = self._index 1945 1946 while self._curr: 1947 self._match(TokenType.COMMA) 1948 if self._match_text_seq("INCREMENT"): 1949 self._match_text_seq("BY") 1950 self._match_text_seq("=") 1951 seq.set("increment", self._parse_term()) 1952 elif self._match_text_seq("MINVALUE"): 1953 seq.set("minvalue", self._parse_term()) 1954 elif self._match_text_seq("MAXVALUE"): 1955 seq.set("maxvalue", self._parse_term()) 1956 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1957 self._match_text_seq("=") 1958 seq.set("start", self._parse_term()) 1959 elif self._match_text_seq("CACHE"): 1960 # T-SQL allows empty CACHE which is initialized dynamically 1961 seq.set("cache", self._parse_number() or True) 1962 elif self._match_text_seq("OWNED", "BY"): 1963 # "OWNED BY NONE" is the default 1964 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1965 else: 1966 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1967 if opt: 1968 options.append(opt) 1969 else: 1970 break 1971 1972 seq.set("options", options if options else None) 1973 return None if self._index == index else seq 1974 1975 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1976 # only used for teradata currently 1977 self._match(TokenType.COMMA) 1978 1979 kwargs = { 1980 "no": self._match_text_seq("NO"), 1981 "dual": self._match_text_seq("DUAL"), 1982 "before": self._match_text_seq("BEFORE"), 1983 "default": self._match_text_seq("DEFAULT"), 1984 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1985 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1986 "after": self._match_text_seq("AFTER"), 1987 "minimum": self._match_texts(("MIN", "MINIMUM")), 1988 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1989 } 1990 1991 if self._match_texts(self.PROPERTY_PARSERS): 1992 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1993 try: 1994 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1995 except TypeError: 1996 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1997 1998 return None 1999 2000 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2001 return self._parse_wrapped_csv(self._parse_property) 2002 2003 def _parse_property(self) -> t.Optional[exp.Expression]: 2004 if self._match_texts(self.PROPERTY_PARSERS): 2005 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2006 2007 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2008 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2009 2010 if self._match_text_seq("COMPOUND", "SORTKEY"): 2011 return self._parse_sortkey(compound=True) 2012 2013 if self._match_text_seq("SQL", "SECURITY"): 2014 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2015 2016 index = self._index 2017 key = self._parse_column() 2018 2019 if not self._match(TokenType.EQ): 2020 self._retreat(index) 2021 return self._parse_sequence_properties() 2022 2023 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2024 if isinstance(key, exp.Column): 2025 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2026 2027 value = self._parse_bitwise() or self._parse_var(any_token=True) 2028 2029 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2030 if isinstance(value, exp.Column): 2031 value = exp.var(value.name) 2032 2033 return self.expression(exp.Property, this=key, value=value) 2034 2035 def _parse_stored(self) -> exp.FileFormatProperty: 2036 self._match(TokenType.ALIAS) 2037 2038 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2039 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2040 2041 return self.expression( 2042 exp.FileFormatProperty, 2043 this=( 2044 self.expression( 2045 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2046 ) 2047 if input_format or output_format 2048 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2049 ), 2050 ) 2051 2052 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2053 field = self._parse_field() 2054 if isinstance(field, exp.Identifier) and not field.quoted: 2055 field = exp.var(field) 2056 2057 return field 2058 2059 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2060 self._match(TokenType.EQ) 2061 self._match(TokenType.ALIAS) 2062 2063 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2064 2065 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2066 properties = [] 2067 while True: 2068 if before: 2069 prop = self._parse_property_before() 2070 else: 2071 prop = self._parse_property() 2072 if not prop: 2073 break 2074 for p in ensure_list(prop): 2075 properties.append(p) 2076 2077 if properties: 2078 return self.expression(exp.Properties, expressions=properties) 2079 2080 return None 2081 2082 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2083 return self.expression( 2084 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2085 ) 2086 2087 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2088 if self._match_texts(("DEFINER", "INVOKER")): 2089 security_specifier = self._prev.text.upper() 2090 return self.expression(exp.SecurityProperty, this=security_specifier) 2091 return None 2092 2093 def _parse_settings_property(self) -> exp.SettingsProperty: 2094 return self.expression( 2095 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2096 ) 2097 2098 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2099 if self._index >= 2: 2100 pre_volatile_token = self._tokens[self._index - 2] 2101 else: 2102 pre_volatile_token = None 2103 2104 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2105 return exp.VolatileProperty() 2106 2107 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2108 2109 def _parse_retention_period(self) -> exp.Var: 2110 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2111 number = self._parse_number() 2112 number_str = f"{number} " if number else "" 2113 unit = self._parse_var(any_token=True) 2114 return exp.var(f"{number_str}{unit}") 2115 2116 def _parse_system_versioning_property( 2117 self, with_: bool = False 2118 ) -> exp.WithSystemVersioningProperty: 2119 self._match(TokenType.EQ) 2120 prop = self.expression( 2121 exp.WithSystemVersioningProperty, 2122 **{ # type: ignore 2123 "on": True, 2124 "with": with_, 2125 }, 2126 ) 2127 2128 if self._match_text_seq("OFF"): 2129 prop.set("on", False) 2130 return prop 2131 2132 self._match(TokenType.ON) 2133 if self._match(TokenType.L_PAREN): 2134 while self._curr and not self._match(TokenType.R_PAREN): 2135 if self._match_text_seq("HISTORY_TABLE", "="): 2136 prop.set("this", self._parse_table_parts()) 2137 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2138 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2139 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2140 prop.set("retention_period", self._parse_retention_period()) 2141 2142 self._match(TokenType.COMMA) 2143 2144 return prop 2145 2146 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2147 self._match(TokenType.EQ) 2148 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2149 prop = self.expression(exp.DataDeletionProperty, on=on) 2150 2151 if self._match(TokenType.L_PAREN): 2152 while self._curr and not self._match(TokenType.R_PAREN): 2153 if self._match_text_seq("FILTER_COLUMN", "="): 2154 prop.set("filter_column", self._parse_column()) 2155 elif self._match_text_seq("RETENTION_PERIOD", "="): 2156 prop.set("retention_period", self._parse_retention_period()) 2157 2158 self._match(TokenType.COMMA) 2159 2160 return prop 2161 2162 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2163 kind = "HASH" 2164 expressions: t.Optional[t.List[exp.Expression]] = None 2165 if self._match_text_seq("BY", "HASH"): 2166 expressions = self._parse_wrapped_csv(self._parse_id_var) 2167 elif self._match_text_seq("BY", "RANDOM"): 2168 kind = "RANDOM" 2169 2170 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2171 buckets: t.Optional[exp.Expression] = None 2172 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2173 buckets = self._parse_number() 2174 2175 return self.expression( 2176 exp.DistributedByProperty, 2177 expressions=expressions, 2178 kind=kind, 2179 buckets=buckets, 2180 order=self._parse_order(), 2181 ) 2182 2183 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2184 self._match_text_seq("KEY") 2185 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2186 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2187 2188 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2189 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2190 prop = self._parse_system_versioning_property(with_=True) 2191 self._match_r_paren() 2192 return prop 2193 2194 if self._match(TokenType.L_PAREN, advance=False): 2195 return self._parse_wrapped_properties() 2196 2197 if self._match_text_seq("JOURNAL"): 2198 return self._parse_withjournaltable() 2199 2200 if self._match_texts(self.VIEW_ATTRIBUTES): 2201 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2202 2203 if self._match_text_seq("DATA"): 2204 return self._parse_withdata(no=False) 2205 elif self._match_text_seq("NO", "DATA"): 2206 return self._parse_withdata(no=True) 2207 2208 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2209 return self._parse_serde_properties(with_=True) 2210 2211 if self._match(TokenType.SCHEMA): 2212 return self.expression( 2213 exp.WithSchemaBindingProperty, 2214 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2215 ) 2216 2217 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2218 return self.expression( 2219 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2220 ) 2221 2222 if not self._next: 2223 return None 2224 2225 return self._parse_withisolatedloading() 2226 2227 def _parse_procedure_option(self) -> exp.Expression | None: 2228 if self._match_text_seq("EXECUTE", "AS"): 2229 return self.expression( 2230 exp.ExecuteAsProperty, 2231 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2232 or self._parse_string(), 2233 ) 2234 2235 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2236 2237 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2238 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2239 self._match(TokenType.EQ) 2240 2241 user = self._parse_id_var() 2242 self._match(TokenType.PARAMETER) 2243 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2244 2245 if not user or not host: 2246 return None 2247 2248 return exp.DefinerProperty(this=f"{user}@{host}") 2249 2250 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2251 self._match(TokenType.TABLE) 2252 self._match(TokenType.EQ) 2253 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2254 2255 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2256 return self.expression(exp.LogProperty, no=no) 2257 2258 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2259 return self.expression(exp.JournalProperty, **kwargs) 2260 2261 def _parse_checksum(self) -> exp.ChecksumProperty: 2262 self._match(TokenType.EQ) 2263 2264 on = None 2265 if self._match(TokenType.ON): 2266 on = True 2267 elif self._match_text_seq("OFF"): 2268 on = False 2269 2270 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2271 2272 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2273 return self.expression( 2274 exp.Cluster, 2275 expressions=( 2276 self._parse_wrapped_csv(self._parse_ordered) 2277 if wrapped 2278 else self._parse_csv(self._parse_ordered) 2279 ), 2280 ) 2281 2282 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2283 self._match_text_seq("BY") 2284 2285 self._match_l_paren() 2286 expressions = self._parse_csv(self._parse_column) 2287 self._match_r_paren() 2288 2289 if self._match_text_seq("SORTED", "BY"): 2290 self._match_l_paren() 2291 sorted_by = self._parse_csv(self._parse_ordered) 2292 self._match_r_paren() 2293 else: 2294 sorted_by = None 2295 2296 self._match(TokenType.INTO) 2297 buckets = self._parse_number() 2298 self._match_text_seq("BUCKETS") 2299 2300 return self.expression( 2301 exp.ClusteredByProperty, 2302 expressions=expressions, 2303 sorted_by=sorted_by, 2304 buckets=buckets, 2305 ) 2306 2307 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2308 if not self._match_text_seq("GRANTS"): 2309 self._retreat(self._index - 1) 2310 return None 2311 2312 return self.expression(exp.CopyGrantsProperty) 2313 2314 def _parse_freespace(self) -> exp.FreespaceProperty: 2315 self._match(TokenType.EQ) 2316 return self.expression( 2317 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2318 ) 2319 2320 def _parse_mergeblockratio( 2321 self, no: bool = False, default: bool = False 2322 ) -> exp.MergeBlockRatioProperty: 2323 if self._match(TokenType.EQ): 2324 return self.expression( 2325 exp.MergeBlockRatioProperty, 2326 this=self._parse_number(), 2327 percent=self._match(TokenType.PERCENT), 2328 ) 2329 2330 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2331 2332 def _parse_datablocksize( 2333 self, 2334 default: t.Optional[bool] = None, 2335 minimum: t.Optional[bool] = None, 2336 maximum: t.Optional[bool] = None, 2337 ) -> exp.DataBlocksizeProperty: 2338 self._match(TokenType.EQ) 2339 size = self._parse_number() 2340 2341 units = None 2342 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2343 units = self._prev.text 2344 2345 return self.expression( 2346 exp.DataBlocksizeProperty, 2347 size=size, 2348 units=units, 2349 default=default, 2350 minimum=minimum, 2351 maximum=maximum, 2352 ) 2353 2354 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2355 self._match(TokenType.EQ) 2356 always = self._match_text_seq("ALWAYS") 2357 manual = self._match_text_seq("MANUAL") 2358 never = self._match_text_seq("NEVER") 2359 default = self._match_text_seq("DEFAULT") 2360 2361 autotemp = None 2362 if self._match_text_seq("AUTOTEMP"): 2363 autotemp = self._parse_schema() 2364 2365 return self.expression( 2366 exp.BlockCompressionProperty, 2367 always=always, 2368 manual=manual, 2369 never=never, 2370 default=default, 2371 autotemp=autotemp, 2372 ) 2373 2374 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2375 index = self._index 2376 no = self._match_text_seq("NO") 2377 concurrent = self._match_text_seq("CONCURRENT") 2378 2379 if not self._match_text_seq("ISOLATED", "LOADING"): 2380 self._retreat(index) 2381 return None 2382 2383 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2384 return self.expression( 2385 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2386 ) 2387 2388 def _parse_locking(self) -> exp.LockingProperty: 2389 if self._match(TokenType.TABLE): 2390 kind = "TABLE" 2391 elif self._match(TokenType.VIEW): 2392 kind = "VIEW" 2393 elif self._match(TokenType.ROW): 2394 kind = "ROW" 2395 elif self._match_text_seq("DATABASE"): 2396 kind = "DATABASE" 2397 else: 2398 kind = None 2399 2400 if kind in ("DATABASE", "TABLE", "VIEW"): 2401 this = self._parse_table_parts() 2402 else: 2403 this = None 2404 2405 if self._match(TokenType.FOR): 2406 for_or_in = "FOR" 2407 elif self._match(TokenType.IN): 2408 for_or_in = "IN" 2409 else: 2410 for_or_in = None 2411 2412 if self._match_text_seq("ACCESS"): 2413 lock_type = "ACCESS" 2414 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2415 lock_type = "EXCLUSIVE" 2416 elif self._match_text_seq("SHARE"): 2417 lock_type = "SHARE" 2418 elif self._match_text_seq("READ"): 2419 lock_type = "READ" 2420 elif self._match_text_seq("WRITE"): 2421 lock_type = "WRITE" 2422 elif self._match_text_seq("CHECKSUM"): 2423 lock_type = "CHECKSUM" 2424 else: 2425 lock_type = None 2426 2427 override = self._match_text_seq("OVERRIDE") 2428 2429 return self.expression( 2430 exp.LockingProperty, 2431 this=this, 2432 kind=kind, 2433 for_or_in=for_or_in, 2434 lock_type=lock_type, 2435 override=override, 2436 ) 2437 2438 def _parse_partition_by(self) -> t.List[exp.Expression]: 2439 if self._match(TokenType.PARTITION_BY): 2440 return self._parse_csv(self._parse_assignment) 2441 return [] 2442 2443 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2444 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2445 if self._match_text_seq("MINVALUE"): 2446 return exp.var("MINVALUE") 2447 if self._match_text_seq("MAXVALUE"): 2448 return exp.var("MAXVALUE") 2449 return self._parse_bitwise() 2450 2451 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2452 expression = None 2453 from_expressions = None 2454 to_expressions = None 2455 2456 if self._match(TokenType.IN): 2457 this = self._parse_wrapped_csv(self._parse_bitwise) 2458 elif self._match(TokenType.FROM): 2459 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2460 self._match_text_seq("TO") 2461 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2462 elif self._match_text_seq("WITH", "(", "MODULUS"): 2463 this = self._parse_number() 2464 self._match_text_seq(",", "REMAINDER") 2465 expression = self._parse_number() 2466 self._match_r_paren() 2467 else: 2468 self.raise_error("Failed to parse partition bound spec.") 2469 2470 return self.expression( 2471 exp.PartitionBoundSpec, 2472 this=this, 2473 expression=expression, 2474 from_expressions=from_expressions, 2475 to_expressions=to_expressions, 2476 ) 2477 2478 # https://www.postgresql.org/docs/current/sql-createtable.html 2479 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2480 if not self._match_text_seq("OF"): 2481 self._retreat(self._index - 1) 2482 return None 2483 2484 this = self._parse_table(schema=True) 2485 2486 if self._match(TokenType.DEFAULT): 2487 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2488 elif self._match_text_seq("FOR", "VALUES"): 2489 expression = self._parse_partition_bound_spec() 2490 else: 2491 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2492 2493 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2494 2495 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2496 self._match(TokenType.EQ) 2497 return self.expression( 2498 exp.PartitionedByProperty, 2499 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2500 ) 2501 2502 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2503 if self._match_text_seq("AND", "STATISTICS"): 2504 statistics = True 2505 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2506 statistics = False 2507 else: 2508 statistics = None 2509 2510 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2511 2512 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2513 if self._match_text_seq("SQL"): 2514 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2515 return None 2516 2517 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2518 if self._match_text_seq("SQL", "DATA"): 2519 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2520 return None 2521 2522 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("PRIMARY", "INDEX"): 2524 return exp.NoPrimaryIndexProperty() 2525 if self._match_text_seq("SQL"): 2526 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2527 return None 2528 2529 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2530 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2531 return exp.OnCommitProperty() 2532 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2533 return exp.OnCommitProperty(delete=True) 2534 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2535 2536 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2537 if self._match_text_seq("SQL", "DATA"): 2538 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2539 return None 2540 2541 def _parse_distkey(self) -> exp.DistKeyProperty: 2542 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2543 2544 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2545 table = self._parse_table(schema=True) 2546 2547 options = [] 2548 while self._match_texts(("INCLUDING", "EXCLUDING")): 2549 this = self._prev.text.upper() 2550 2551 id_var = self._parse_id_var() 2552 if not id_var: 2553 return None 2554 2555 options.append( 2556 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2557 ) 2558 2559 return self.expression(exp.LikeProperty, this=table, expressions=options) 2560 2561 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2562 return self.expression( 2563 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2564 ) 2565 2566 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2567 self._match(TokenType.EQ) 2568 return self.expression( 2569 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2570 ) 2571 2572 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2573 self._match_text_seq("WITH", "CONNECTION") 2574 return self.expression( 2575 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2576 ) 2577 2578 def _parse_returns(self) -> exp.ReturnsProperty: 2579 value: t.Optional[exp.Expression] 2580 null = None 2581 is_table = self._match(TokenType.TABLE) 2582 2583 if is_table: 2584 if self._match(TokenType.LT): 2585 value = self.expression( 2586 exp.Schema, 2587 this="TABLE", 2588 expressions=self._parse_csv(self._parse_struct_types), 2589 ) 2590 if not self._match(TokenType.GT): 2591 self.raise_error("Expecting >") 2592 else: 2593 value = self._parse_schema(exp.var("TABLE")) 2594 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2595 null = True 2596 value = None 2597 else: 2598 value = self._parse_types() 2599 2600 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2601 2602 def _parse_describe(self) -> exp.Describe: 2603 kind = self._match_set(self.CREATABLES) and self._prev.text 2604 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2605 if self._match(TokenType.DOT): 2606 style = None 2607 self._retreat(self._index - 2) 2608 this = self._parse_table(schema=True) 2609 properties = self._parse_properties() 2610 expressions = properties.expressions if properties else None 2611 partition = self._parse_partition() 2612 return self.expression( 2613 exp.Describe, 2614 this=this, 2615 style=style, 2616 kind=kind, 2617 expressions=expressions, 2618 partition=partition, 2619 ) 2620 2621 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2622 kind = self._prev.text.upper() 2623 expressions = [] 2624 2625 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2626 if self._match(TokenType.WHEN): 2627 expression = self._parse_disjunction() 2628 self._match(TokenType.THEN) 2629 else: 2630 expression = None 2631 2632 else_ = self._match(TokenType.ELSE) 2633 2634 if not self._match(TokenType.INTO): 2635 return None 2636 2637 return self.expression( 2638 exp.ConditionalInsert, 2639 this=self.expression( 2640 exp.Insert, 2641 this=self._parse_table(schema=True), 2642 expression=self._parse_derived_table_values(), 2643 ), 2644 expression=expression, 2645 else_=else_, 2646 ) 2647 2648 expression = parse_conditional_insert() 2649 while expression is not None: 2650 expressions.append(expression) 2651 expression = parse_conditional_insert() 2652 2653 return self.expression( 2654 exp.MultitableInserts, 2655 kind=kind, 2656 comments=comments, 2657 expressions=expressions, 2658 source=self._parse_table(), 2659 ) 2660 2661 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2662 comments = ensure_list(self._prev_comments) 2663 hint = self._parse_hint() 2664 overwrite = self._match(TokenType.OVERWRITE) 2665 ignore = self._match(TokenType.IGNORE) 2666 local = self._match_text_seq("LOCAL") 2667 alternative = None 2668 is_function = None 2669 2670 if self._match_text_seq("DIRECTORY"): 2671 this: t.Optional[exp.Expression] = self.expression( 2672 exp.Directory, 2673 this=self._parse_var_or_string(), 2674 local=local, 2675 row_format=self._parse_row_format(match_row=True), 2676 ) 2677 else: 2678 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2679 comments += ensure_list(self._prev_comments) 2680 return self._parse_multitable_inserts(comments) 2681 2682 if self._match(TokenType.OR): 2683 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2684 2685 self._match(TokenType.INTO) 2686 comments += ensure_list(self._prev_comments) 2687 self._match(TokenType.TABLE) 2688 is_function = self._match(TokenType.FUNCTION) 2689 2690 this = ( 2691 self._parse_table(schema=True, parse_partition=True) 2692 if not is_function 2693 else self._parse_function() 2694 ) 2695 2696 returning = self._parse_returning() 2697 2698 return self.expression( 2699 exp.Insert, 2700 comments=comments, 2701 hint=hint, 2702 is_function=is_function, 2703 this=this, 2704 stored=self._match_text_seq("STORED") and self._parse_stored(), 2705 by_name=self._match_text_seq("BY", "NAME"), 2706 exists=self._parse_exists(), 2707 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2708 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2709 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2710 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2711 conflict=self._parse_on_conflict(), 2712 returning=returning or self._parse_returning(), 2713 overwrite=overwrite, 2714 alternative=alternative, 2715 ignore=ignore, 2716 source=self._match(TokenType.TABLE) and self._parse_table(), 2717 ) 2718 2719 def _parse_kill(self) -> exp.Kill: 2720 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2721 2722 return self.expression( 2723 exp.Kill, 2724 this=self._parse_primary(), 2725 kind=kind, 2726 ) 2727 2728 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2729 conflict = self._match_text_seq("ON", "CONFLICT") 2730 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2731 2732 if not conflict and not duplicate: 2733 return None 2734 2735 conflict_keys = None 2736 constraint = None 2737 2738 if conflict: 2739 if self._match_text_seq("ON", "CONSTRAINT"): 2740 constraint = self._parse_id_var() 2741 elif self._match(TokenType.L_PAREN): 2742 conflict_keys = self._parse_csv(self._parse_id_var) 2743 self._match_r_paren() 2744 2745 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2746 if self._prev.token_type == TokenType.UPDATE: 2747 self._match(TokenType.SET) 2748 expressions = self._parse_csv(self._parse_equality) 2749 else: 2750 expressions = None 2751 2752 return self.expression( 2753 exp.OnConflict, 2754 duplicate=duplicate, 2755 expressions=expressions, 2756 action=action, 2757 conflict_keys=conflict_keys, 2758 constraint=constraint, 2759 ) 2760 2761 def _parse_returning(self) -> t.Optional[exp.Returning]: 2762 if not self._match(TokenType.RETURNING): 2763 return None 2764 return self.expression( 2765 exp.Returning, 2766 expressions=self._parse_csv(self._parse_expression), 2767 into=self._match(TokenType.INTO) and self._parse_table_part(), 2768 ) 2769 2770 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2771 if not self._match(TokenType.FORMAT): 2772 return None 2773 return self._parse_row_format() 2774 2775 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2776 index = self._index 2777 with_ = with_ or self._match_text_seq("WITH") 2778 2779 if not self._match(TokenType.SERDE_PROPERTIES): 2780 self._retreat(index) 2781 return None 2782 return self.expression( 2783 exp.SerdeProperties, 2784 **{ # type: ignore 2785 "expressions": self._parse_wrapped_properties(), 2786 "with": with_, 2787 }, 2788 ) 2789 2790 def _parse_row_format( 2791 self, match_row: bool = False 2792 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2793 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2794 return None 2795 2796 if self._match_text_seq("SERDE"): 2797 this = self._parse_string() 2798 2799 serde_properties = self._parse_serde_properties() 2800 2801 return self.expression( 2802 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2803 ) 2804 2805 self._match_text_seq("DELIMITED") 2806 2807 kwargs = {} 2808 2809 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2810 kwargs["fields"] = self._parse_string() 2811 if self._match_text_seq("ESCAPED", "BY"): 2812 kwargs["escaped"] = self._parse_string() 2813 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2814 kwargs["collection_items"] = self._parse_string() 2815 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2816 kwargs["map_keys"] = self._parse_string() 2817 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2818 kwargs["lines"] = self._parse_string() 2819 if self._match_text_seq("NULL", "DEFINED", "AS"): 2820 kwargs["null"] = self._parse_string() 2821 2822 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2823 2824 def _parse_load(self) -> exp.LoadData | exp.Command: 2825 if self._match_text_seq("DATA"): 2826 local = self._match_text_seq("LOCAL") 2827 self._match_text_seq("INPATH") 2828 inpath = self._parse_string() 2829 overwrite = self._match(TokenType.OVERWRITE) 2830 self._match_pair(TokenType.INTO, TokenType.TABLE) 2831 2832 return self.expression( 2833 exp.LoadData, 2834 this=self._parse_table(schema=True), 2835 local=local, 2836 overwrite=overwrite, 2837 inpath=inpath, 2838 partition=self._parse_partition(), 2839 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2840 serde=self._match_text_seq("SERDE") and self._parse_string(), 2841 ) 2842 return self._parse_as_command(self._prev) 2843 2844 def _parse_delete(self) -> exp.Delete: 2845 # This handles MySQL's "Multiple-Table Syntax" 2846 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2847 tables = None 2848 comments = self._prev_comments 2849 if not self._match(TokenType.FROM, advance=False): 2850 tables = self._parse_csv(self._parse_table) or None 2851 2852 returning = self._parse_returning() 2853 2854 return self.expression( 2855 exp.Delete, 2856 comments=comments, 2857 tables=tables, 2858 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2859 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2860 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2861 where=self._parse_where(), 2862 returning=returning or self._parse_returning(), 2863 limit=self._parse_limit(), 2864 ) 2865 2866 def _parse_update(self) -> exp.Update: 2867 comments = self._prev_comments 2868 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2869 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2870 returning = self._parse_returning() 2871 return self.expression( 2872 exp.Update, 2873 comments=comments, 2874 **{ # type: ignore 2875 "this": this, 2876 "expressions": expressions, 2877 "from": self._parse_from(joins=True), 2878 "where": self._parse_where(), 2879 "returning": returning or self._parse_returning(), 2880 "order": self._parse_order(), 2881 "limit": self._parse_limit(), 2882 }, 2883 ) 2884 2885 def _parse_uncache(self) -> exp.Uncache: 2886 if not self._match(TokenType.TABLE): 2887 self.raise_error("Expecting TABLE after UNCACHE") 2888 2889 return self.expression( 2890 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2891 ) 2892 2893 def _parse_cache(self) -> exp.Cache: 2894 lazy = self._match_text_seq("LAZY") 2895 self._match(TokenType.TABLE) 2896 table = self._parse_table(schema=True) 2897 2898 options = [] 2899 if self._match_text_seq("OPTIONS"): 2900 self._match_l_paren() 2901 k = self._parse_string() 2902 self._match(TokenType.EQ) 2903 v = self._parse_string() 2904 options = [k, v] 2905 self._match_r_paren() 2906 2907 self._match(TokenType.ALIAS) 2908 return self.expression( 2909 exp.Cache, 2910 this=table, 2911 lazy=lazy, 2912 options=options, 2913 expression=self._parse_select(nested=True), 2914 ) 2915 2916 def _parse_partition(self) -> t.Optional[exp.Partition]: 2917 if not self._match(TokenType.PARTITION): 2918 return None 2919 2920 return self.expression( 2921 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2922 ) 2923 2924 def _parse_value(self) -> t.Optional[exp.Tuple]: 2925 if self._match(TokenType.L_PAREN): 2926 expressions = self._parse_csv(self._parse_expression) 2927 self._match_r_paren() 2928 return self.expression(exp.Tuple, expressions=expressions) 2929 2930 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2931 expression = self._parse_expression() 2932 if expression: 2933 return self.expression(exp.Tuple, expressions=[expression]) 2934 return None 2935 2936 def _parse_projections(self) -> t.List[exp.Expression]: 2937 return self._parse_expressions() 2938 2939 def _parse_select( 2940 self, 2941 nested: bool = False, 2942 table: bool = False, 2943 parse_subquery_alias: bool = True, 2944 parse_set_operation: bool = True, 2945 ) -> t.Optional[exp.Expression]: 2946 cte = self._parse_with() 2947 2948 if cte: 2949 this = self._parse_statement() 2950 2951 if not this: 2952 self.raise_error("Failed to parse any statement following CTE") 2953 return cte 2954 2955 if "with" in this.arg_types: 2956 this.set("with", cte) 2957 else: 2958 self.raise_error(f"{this.key} does not support CTE") 2959 this = cte 2960 2961 return this 2962 2963 # duckdb supports leading with FROM x 2964 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2965 2966 if self._match(TokenType.SELECT): 2967 comments = self._prev_comments 2968 2969 hint = self._parse_hint() 2970 2971 if self._next and not self._next.token_type == TokenType.DOT: 2972 all_ = self._match(TokenType.ALL) 2973 distinct = self._match_set(self.DISTINCT_TOKENS) 2974 else: 2975 all_, distinct = None, None 2976 2977 kind = ( 2978 self._match(TokenType.ALIAS) 2979 and self._match_texts(("STRUCT", "VALUE")) 2980 and self._prev.text.upper() 2981 ) 2982 2983 if distinct: 2984 distinct = self.expression( 2985 exp.Distinct, 2986 on=self._parse_value() if self._match(TokenType.ON) else None, 2987 ) 2988 2989 if all_ and distinct: 2990 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2991 2992 operation_modifiers = [] 2993 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2994 operation_modifiers.append(exp.var(self._prev.text.upper())) 2995 2996 limit = self._parse_limit(top=True) 2997 projections = self._parse_projections() 2998 2999 this = self.expression( 3000 exp.Select, 3001 kind=kind, 3002 hint=hint, 3003 distinct=distinct, 3004 expressions=projections, 3005 limit=limit, 3006 operation_modifiers=operation_modifiers or None, 3007 ) 3008 this.comments = comments 3009 3010 into = self._parse_into() 3011 if into: 3012 this.set("into", into) 3013 3014 if not from_: 3015 from_ = self._parse_from() 3016 3017 if from_: 3018 this.set("from", from_) 3019 3020 this = self._parse_query_modifiers(this) 3021 elif (table or nested) and self._match(TokenType.L_PAREN): 3022 if self._match(TokenType.PIVOT): 3023 this = self._parse_simplified_pivot() 3024 elif self._match(TokenType.FROM): 3025 this = exp.select("*").from_( 3026 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3027 ) 3028 else: 3029 this = ( 3030 self._parse_table() 3031 if table 3032 else self._parse_select(nested=True, parse_set_operation=False) 3033 ) 3034 3035 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3036 # in case a modifier (e.g. join) is following 3037 if table and isinstance(this, exp.Values) and this.alias: 3038 alias = this.args["alias"].pop() 3039 this = exp.Table(this=this, alias=alias) 3040 3041 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3042 3043 self._match_r_paren() 3044 3045 # We return early here so that the UNION isn't attached to the subquery by the 3046 # following call to _parse_set_operations, but instead becomes the parent node 3047 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3048 elif self._match(TokenType.VALUES, advance=False): 3049 this = self._parse_derived_table_values() 3050 elif from_: 3051 this = exp.select("*").from_(from_.this, copy=False) 3052 elif self._match(TokenType.SUMMARIZE): 3053 table = self._match(TokenType.TABLE) 3054 this = self._parse_select() or self._parse_string() or self._parse_table() 3055 return self.expression(exp.Summarize, this=this, table=table) 3056 elif self._match(TokenType.DESCRIBE): 3057 this = self._parse_describe() 3058 elif self._match_text_seq("STREAM"): 3059 this = self.expression(exp.Stream, this=self._parse_function()) 3060 else: 3061 this = None 3062 3063 return self._parse_set_operations(this) if parse_set_operation else this 3064 3065 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3066 if not skip_with_token and not self._match(TokenType.WITH): 3067 return None 3068 3069 comments = self._prev_comments 3070 recursive = self._match(TokenType.RECURSIVE) 3071 3072 last_comments = None 3073 expressions = [] 3074 while True: 3075 expressions.append(self._parse_cte()) 3076 if last_comments: 3077 expressions[-1].add_comments(last_comments) 3078 3079 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3080 break 3081 else: 3082 self._match(TokenType.WITH) 3083 3084 last_comments = self._prev_comments 3085 3086 return self.expression( 3087 exp.With, comments=comments, expressions=expressions, recursive=recursive 3088 ) 3089 3090 def _parse_cte(self) -> exp.CTE: 3091 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3092 if not alias or not alias.this: 3093 self.raise_error("Expected CTE to have alias") 3094 3095 self._match(TokenType.ALIAS) 3096 comments = self._prev_comments 3097 3098 if self._match_text_seq("NOT", "MATERIALIZED"): 3099 materialized = False 3100 elif self._match_text_seq("MATERIALIZED"): 3101 materialized = True 3102 else: 3103 materialized = None 3104 3105 return self.expression( 3106 exp.CTE, 3107 this=self._parse_wrapped(self._parse_statement), 3108 alias=alias, 3109 materialized=materialized, 3110 comments=comments, 3111 ) 3112 3113 def _parse_table_alias( 3114 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3115 ) -> t.Optional[exp.TableAlias]: 3116 any_token = self._match(TokenType.ALIAS) 3117 alias = ( 3118 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3119 or self._parse_string_as_identifier() 3120 ) 3121 3122 index = self._index 3123 if self._match(TokenType.L_PAREN): 3124 columns = self._parse_csv(self._parse_function_parameter) 3125 self._match_r_paren() if columns else self._retreat(index) 3126 else: 3127 columns = None 3128 3129 if not alias and not columns: 3130 return None 3131 3132 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3133 3134 # We bubble up comments from the Identifier to the TableAlias 3135 if isinstance(alias, exp.Identifier): 3136 table_alias.add_comments(alias.pop_comments()) 3137 3138 return table_alias 3139 3140 def _parse_subquery( 3141 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3142 ) -> t.Optional[exp.Subquery]: 3143 if not this: 3144 return None 3145 3146 return self.expression( 3147 exp.Subquery, 3148 this=this, 3149 pivots=self._parse_pivots(), 3150 alias=self._parse_table_alias() if parse_alias else None, 3151 sample=self._parse_table_sample(), 3152 ) 3153 3154 def _implicit_unnests_to_explicit(self, this: E) -> E: 3155 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3156 3157 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3158 for i, join in enumerate(this.args.get("joins") or []): 3159 table = join.this 3160 normalized_table = table.copy() 3161 normalized_table.meta["maybe_column"] = True 3162 normalized_table = _norm(normalized_table, dialect=self.dialect) 3163 3164 if isinstance(table, exp.Table) and not join.args.get("on"): 3165 if normalized_table.parts[0].name in refs: 3166 table_as_column = table.to_column() 3167 unnest = exp.Unnest(expressions=[table_as_column]) 3168 3169 # Table.to_column creates a parent Alias node that we want to convert to 3170 # a TableAlias and attach to the Unnest, so it matches the parser's output 3171 if isinstance(table.args.get("alias"), exp.TableAlias): 3172 table_as_column.replace(table_as_column.this) 3173 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3174 3175 table.replace(unnest) 3176 3177 refs.add(normalized_table.alias_or_name) 3178 3179 return this 3180 3181 def _parse_query_modifiers( 3182 self, this: t.Optional[exp.Expression] 3183 ) -> t.Optional[exp.Expression]: 3184 if isinstance(this, (exp.Query, exp.Table)): 3185 for join in self._parse_joins(): 3186 this.append("joins", join) 3187 for lateral in iter(self._parse_lateral, None): 3188 this.append("laterals", lateral) 3189 3190 while True: 3191 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3192 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3193 key, expression = parser(self) 3194 3195 if expression: 3196 this.set(key, expression) 3197 if key == "limit": 3198 offset = expression.args.pop("offset", None) 3199 3200 if offset: 3201 offset = exp.Offset(expression=offset) 3202 this.set("offset", offset) 3203 3204 limit_by_expressions = expression.expressions 3205 expression.set("expressions", None) 3206 offset.set("expressions", limit_by_expressions) 3207 continue 3208 break 3209 3210 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3211 this = self._implicit_unnests_to_explicit(this) 3212 3213 return this 3214 3215 def _parse_hint(self) -> t.Optional[exp.Hint]: 3216 if self._match(TokenType.HINT): 3217 hints = [] 3218 for hint in iter( 3219 lambda: self._parse_csv( 3220 lambda: self._parse_function() or self._parse_var(upper=True) 3221 ), 3222 [], 3223 ): 3224 hints.extend(hint) 3225 3226 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3227 self.raise_error("Expected */ after HINT") 3228 3229 return self.expression(exp.Hint, expressions=hints) 3230 3231 return None 3232 3233 def _parse_into(self) -> t.Optional[exp.Into]: 3234 if not self._match(TokenType.INTO): 3235 return None 3236 3237 temp = self._match(TokenType.TEMPORARY) 3238 unlogged = self._match_text_seq("UNLOGGED") 3239 self._match(TokenType.TABLE) 3240 3241 return self.expression( 3242 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3243 ) 3244 3245 def _parse_from( 3246 self, joins: bool = False, skip_from_token: bool = False 3247 ) -> t.Optional[exp.From]: 3248 if not skip_from_token and not self._match(TokenType.FROM): 3249 return None 3250 3251 return self.expression( 3252 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3253 ) 3254 3255 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3256 return self.expression( 3257 exp.MatchRecognizeMeasure, 3258 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3259 this=self._parse_expression(), 3260 ) 3261 3262 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3263 if not self._match(TokenType.MATCH_RECOGNIZE): 3264 return None 3265 3266 self._match_l_paren() 3267 3268 partition = self._parse_partition_by() 3269 order = self._parse_order() 3270 3271 measures = ( 3272 self._parse_csv(self._parse_match_recognize_measure) 3273 if self._match_text_seq("MEASURES") 3274 else None 3275 ) 3276 3277 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3278 rows = exp.var("ONE ROW PER MATCH") 3279 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3280 text = "ALL ROWS PER MATCH" 3281 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3282 text += " SHOW EMPTY MATCHES" 3283 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3284 text += " OMIT EMPTY MATCHES" 3285 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3286 text += " WITH UNMATCHED ROWS" 3287 rows = exp.var(text) 3288 else: 3289 rows = None 3290 3291 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3292 text = "AFTER MATCH SKIP" 3293 if self._match_text_seq("PAST", "LAST", "ROW"): 3294 text += " PAST LAST ROW" 3295 elif self._match_text_seq("TO", "NEXT", "ROW"): 3296 text += " TO NEXT ROW" 3297 elif self._match_text_seq("TO", "FIRST"): 3298 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3299 elif self._match_text_seq("TO", "LAST"): 3300 text += f" TO LAST {self._advance_any().text}" # type: ignore 3301 after = exp.var(text) 3302 else: 3303 after = None 3304 3305 if self._match_text_seq("PATTERN"): 3306 self._match_l_paren() 3307 3308 if not self._curr: 3309 self.raise_error("Expecting )", self._curr) 3310 3311 paren = 1 3312 start = self._curr 3313 3314 while self._curr and paren > 0: 3315 if self._curr.token_type == TokenType.L_PAREN: 3316 paren += 1 3317 if self._curr.token_type == TokenType.R_PAREN: 3318 paren -= 1 3319 3320 end = self._prev 3321 self._advance() 3322 3323 if paren > 0: 3324 self.raise_error("Expecting )", self._curr) 3325 3326 pattern = exp.var(self._find_sql(start, end)) 3327 else: 3328 pattern = None 3329 3330 define = ( 3331 self._parse_csv(self._parse_name_as_expression) 3332 if self._match_text_seq("DEFINE") 3333 else None 3334 ) 3335 3336 self._match_r_paren() 3337 3338 return self.expression( 3339 exp.MatchRecognize, 3340 partition_by=partition, 3341 order=order, 3342 measures=measures, 3343 rows=rows, 3344 after=after, 3345 pattern=pattern, 3346 define=define, 3347 alias=self._parse_table_alias(), 3348 ) 3349 3350 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3351 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3352 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3353 cross_apply = False 3354 3355 if cross_apply is not None: 3356 this = self._parse_select(table=True) 3357 view = None 3358 outer = None 3359 elif self._match(TokenType.LATERAL): 3360 this = self._parse_select(table=True) 3361 view = self._match(TokenType.VIEW) 3362 outer = self._match(TokenType.OUTER) 3363 else: 3364 return None 3365 3366 if not this: 3367 this = ( 3368 self._parse_unnest() 3369 or self._parse_function() 3370 or self._parse_id_var(any_token=False) 3371 ) 3372 3373 while self._match(TokenType.DOT): 3374 this = exp.Dot( 3375 this=this, 3376 expression=self._parse_function() or self._parse_id_var(any_token=False), 3377 ) 3378 3379 if view: 3380 table = self._parse_id_var(any_token=False) 3381 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3382 table_alias: t.Optional[exp.TableAlias] = self.expression( 3383 exp.TableAlias, this=table, columns=columns 3384 ) 3385 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3386 # We move the alias from the lateral's child node to the lateral itself 3387 table_alias = this.args["alias"].pop() 3388 else: 3389 table_alias = self._parse_table_alias() 3390 3391 return self.expression( 3392 exp.Lateral, 3393 this=this, 3394 view=view, 3395 outer=outer, 3396 alias=table_alias, 3397 cross_apply=cross_apply, 3398 ) 3399 3400 def _parse_join_parts( 3401 self, 3402 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3403 return ( 3404 self._match_set(self.JOIN_METHODS) and self._prev, 3405 self._match_set(self.JOIN_SIDES) and self._prev, 3406 self._match_set(self.JOIN_KINDS) and self._prev, 3407 ) 3408 3409 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3410 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3411 this = self._parse_column() 3412 if isinstance(this, exp.Column): 3413 return this.this 3414 return this 3415 3416 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3417 3418 def _parse_join( 3419 self, skip_join_token: bool = False, parse_bracket: bool = False 3420 ) -> t.Optional[exp.Join]: 3421 if self._match(TokenType.COMMA): 3422 return self.expression(exp.Join, this=self._parse_table()) 3423 3424 index = self._index 3425 method, side, kind = self._parse_join_parts() 3426 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3427 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3428 3429 if not skip_join_token and not join: 3430 self._retreat(index) 3431 kind = None 3432 method = None 3433 side = None 3434 3435 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3436 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3437 3438 if not skip_join_token and not join and not outer_apply and not cross_apply: 3439 return None 3440 3441 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3442 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3443 kwargs["expressions"] = self._parse_csv( 3444 lambda: self._parse_table(parse_bracket=parse_bracket) 3445 ) 3446 3447 if method: 3448 kwargs["method"] = method.text 3449 if side: 3450 kwargs["side"] = side.text 3451 if kind: 3452 kwargs["kind"] = kind.text 3453 if hint: 3454 kwargs["hint"] = hint 3455 3456 if self._match(TokenType.MATCH_CONDITION): 3457 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3458 3459 if self._match(TokenType.ON): 3460 kwargs["on"] = self._parse_assignment() 3461 elif self._match(TokenType.USING): 3462 kwargs["using"] = self._parse_using_identifiers() 3463 elif ( 3464 not (outer_apply or cross_apply) 3465 and not isinstance(kwargs["this"], exp.Unnest) 3466 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3467 ): 3468 index = self._index 3469 joins: t.Optional[list] = list(self._parse_joins()) 3470 3471 if joins and self._match(TokenType.ON): 3472 kwargs["on"] = self._parse_assignment() 3473 elif joins and self._match(TokenType.USING): 3474 kwargs["using"] = self._parse_using_identifiers() 3475 else: 3476 joins = None 3477 self._retreat(index) 3478 3479 kwargs["this"].set("joins", joins if joins else None) 3480 3481 comments = [c for token in (method, side, kind) if token for c in token.comments] 3482 return self.expression(exp.Join, comments=comments, **kwargs) 3483 3484 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3485 this = self._parse_assignment() 3486 3487 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3488 return this 3489 3490 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3491 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3492 3493 return this 3494 3495 def _parse_index_params(self) -> exp.IndexParameters: 3496 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3497 3498 if self._match(TokenType.L_PAREN, advance=False): 3499 columns = self._parse_wrapped_csv(self._parse_with_operator) 3500 else: 3501 columns = None 3502 3503 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3504 partition_by = self._parse_partition_by() 3505 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3506 tablespace = ( 3507 self._parse_var(any_token=True) 3508 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3509 else None 3510 ) 3511 where = self._parse_where() 3512 3513 on = self._parse_field() if self._match(TokenType.ON) else None 3514 3515 return self.expression( 3516 exp.IndexParameters, 3517 using=using, 3518 columns=columns, 3519 include=include, 3520 partition_by=partition_by, 3521 where=where, 3522 with_storage=with_storage, 3523 tablespace=tablespace, 3524 on=on, 3525 ) 3526 3527 def _parse_index( 3528 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3529 ) -> t.Optional[exp.Index]: 3530 if index or anonymous: 3531 unique = None 3532 primary = None 3533 amp = None 3534 3535 self._match(TokenType.ON) 3536 self._match(TokenType.TABLE) # hive 3537 table = self._parse_table_parts(schema=True) 3538 else: 3539 unique = self._match(TokenType.UNIQUE) 3540 primary = self._match_text_seq("PRIMARY") 3541 amp = self._match_text_seq("AMP") 3542 3543 if not self._match(TokenType.INDEX): 3544 return None 3545 3546 index = self._parse_id_var() 3547 table = None 3548 3549 params = self._parse_index_params() 3550 3551 return self.expression( 3552 exp.Index, 3553 this=index, 3554 table=table, 3555 unique=unique, 3556 primary=primary, 3557 amp=amp, 3558 params=params, 3559 ) 3560 3561 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3562 hints: t.List[exp.Expression] = [] 3563 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3564 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3565 hints.append( 3566 self.expression( 3567 exp.WithTableHint, 3568 expressions=self._parse_csv( 3569 lambda: self._parse_function() or self._parse_var(any_token=True) 3570 ), 3571 ) 3572 ) 3573 self._match_r_paren() 3574 else: 3575 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3576 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3577 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3578 3579 self._match_set((TokenType.INDEX, TokenType.KEY)) 3580 if self._match(TokenType.FOR): 3581 hint.set("target", self._advance_any() and self._prev.text.upper()) 3582 3583 hint.set("expressions", self._parse_wrapped_id_vars()) 3584 hints.append(hint) 3585 3586 return hints or None 3587 3588 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3589 return ( 3590 (not schema and self._parse_function(optional_parens=False)) 3591 or self._parse_id_var(any_token=False) 3592 or self._parse_string_as_identifier() 3593 or self._parse_placeholder() 3594 ) 3595 3596 def _parse_table_parts( 3597 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3598 ) -> exp.Table: 3599 catalog = None 3600 db = None 3601 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3602 3603 while self._match(TokenType.DOT): 3604 if catalog: 3605 # This allows nesting the table in arbitrarily many dot expressions if needed 3606 table = self.expression( 3607 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3608 ) 3609 else: 3610 catalog = db 3611 db = table 3612 # "" used for tsql FROM a..b case 3613 table = self._parse_table_part(schema=schema) or "" 3614 3615 if ( 3616 wildcard 3617 and self._is_connected() 3618 and (isinstance(table, exp.Identifier) or not table) 3619 and self._match(TokenType.STAR) 3620 ): 3621 if isinstance(table, exp.Identifier): 3622 table.args["this"] += "*" 3623 else: 3624 table = exp.Identifier(this="*") 3625 3626 # We bubble up comments from the Identifier to the Table 3627 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3628 3629 if is_db_reference: 3630 catalog = db 3631 db = table 3632 table = None 3633 3634 if not table and not is_db_reference: 3635 self.raise_error(f"Expected table name but got {self._curr}") 3636 if not db and is_db_reference: 3637 self.raise_error(f"Expected database name but got {self._curr}") 3638 3639 table = self.expression( 3640 exp.Table, 3641 comments=comments, 3642 this=table, 3643 db=db, 3644 catalog=catalog, 3645 ) 3646 3647 changes = self._parse_changes() 3648 if changes: 3649 table.set("changes", changes) 3650 3651 at_before = self._parse_historical_data() 3652 if at_before: 3653 table.set("when", at_before) 3654 3655 pivots = self._parse_pivots() 3656 if pivots: 3657 table.set("pivots", pivots) 3658 3659 return table 3660 3661 def _parse_table( 3662 self, 3663 schema: bool = False, 3664 joins: bool = False, 3665 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3666 parse_bracket: bool = False, 3667 is_db_reference: bool = False, 3668 parse_partition: bool = False, 3669 ) -> t.Optional[exp.Expression]: 3670 lateral = self._parse_lateral() 3671 if lateral: 3672 return lateral 3673 3674 unnest = self._parse_unnest() 3675 if unnest: 3676 return unnest 3677 3678 values = self._parse_derived_table_values() 3679 if values: 3680 return values 3681 3682 subquery = self._parse_select(table=True) 3683 if subquery: 3684 if not subquery.args.get("pivots"): 3685 subquery.set("pivots", self._parse_pivots()) 3686 return subquery 3687 3688 bracket = parse_bracket and self._parse_bracket(None) 3689 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3690 3691 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3692 self._parse_table 3693 ) 3694 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3695 3696 only = self._match(TokenType.ONLY) 3697 3698 this = t.cast( 3699 exp.Expression, 3700 bracket 3701 or rows_from 3702 or self._parse_bracket( 3703 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3704 ), 3705 ) 3706 3707 if only: 3708 this.set("only", only) 3709 3710 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3711 self._match_text_seq("*") 3712 3713 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3714 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3715 this.set("partition", self._parse_partition()) 3716 3717 if schema: 3718 return self._parse_schema(this=this) 3719 3720 version = self._parse_version() 3721 3722 if version: 3723 this.set("version", version) 3724 3725 if self.dialect.ALIAS_POST_TABLESAMPLE: 3726 this.set("sample", self._parse_table_sample()) 3727 3728 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3729 if alias: 3730 this.set("alias", alias) 3731 3732 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3733 return self.expression( 3734 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3735 ) 3736 3737 this.set("hints", self._parse_table_hints()) 3738 3739 if not this.args.get("pivots"): 3740 this.set("pivots", self._parse_pivots()) 3741 3742 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3743 this.set("sample", self._parse_table_sample()) 3744 3745 if joins: 3746 for join in self._parse_joins(): 3747 this.append("joins", join) 3748 3749 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3750 this.set("ordinality", True) 3751 this.set("alias", self._parse_table_alias()) 3752 3753 return this 3754 3755 def _parse_version(self) -> t.Optional[exp.Version]: 3756 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3757 this = "TIMESTAMP" 3758 elif self._match(TokenType.VERSION_SNAPSHOT): 3759 this = "VERSION" 3760 else: 3761 return None 3762 3763 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3764 kind = self._prev.text.upper() 3765 start = self._parse_bitwise() 3766 self._match_texts(("TO", "AND")) 3767 end = self._parse_bitwise() 3768 expression: t.Optional[exp.Expression] = self.expression( 3769 exp.Tuple, expressions=[start, end] 3770 ) 3771 elif self._match_text_seq("CONTAINED", "IN"): 3772 kind = "CONTAINED IN" 3773 expression = self.expression( 3774 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3775 ) 3776 elif self._match(TokenType.ALL): 3777 kind = "ALL" 3778 expression = None 3779 else: 3780 self._match_text_seq("AS", "OF") 3781 kind = "AS OF" 3782 expression = self._parse_type() 3783 3784 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3785 3786 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3787 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3788 index = self._index 3789 historical_data = None 3790 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3791 this = self._prev.text.upper() 3792 kind = ( 3793 self._match(TokenType.L_PAREN) 3794 and self._match_texts(self.HISTORICAL_DATA_KIND) 3795 and self._prev.text.upper() 3796 ) 3797 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3798 3799 if expression: 3800 self._match_r_paren() 3801 historical_data = self.expression( 3802 exp.HistoricalData, this=this, kind=kind, expression=expression 3803 ) 3804 else: 3805 self._retreat(index) 3806 3807 return historical_data 3808 3809 def _parse_changes(self) -> t.Optional[exp.Changes]: 3810 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3811 return None 3812 3813 information = self._parse_var(any_token=True) 3814 self._match_r_paren() 3815 3816 return self.expression( 3817 exp.Changes, 3818 information=information, 3819 at_before=self._parse_historical_data(), 3820 end=self._parse_historical_data(), 3821 ) 3822 3823 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3824 if not self._match(TokenType.UNNEST): 3825 return None 3826 3827 expressions = self._parse_wrapped_csv(self._parse_equality) 3828 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3829 3830 alias = self._parse_table_alias() if with_alias else None 3831 3832 if alias: 3833 if self.dialect.UNNEST_COLUMN_ONLY: 3834 if alias.args.get("columns"): 3835 self.raise_error("Unexpected extra column alias in unnest.") 3836 3837 alias.set("columns", [alias.this]) 3838 alias.set("this", None) 3839 3840 columns = alias.args.get("columns") or [] 3841 if offset and len(expressions) < len(columns): 3842 offset = columns.pop() 3843 3844 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3845 self._match(TokenType.ALIAS) 3846 offset = self._parse_id_var( 3847 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3848 ) or exp.to_identifier("offset") 3849 3850 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3851 3852 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3853 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3854 if not is_derived and not ( 3855 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3856 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3857 ): 3858 return None 3859 3860 expressions = self._parse_csv(self._parse_value) 3861 alias = self._parse_table_alias() 3862 3863 if is_derived: 3864 self._match_r_paren() 3865 3866 return self.expression( 3867 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3868 ) 3869 3870 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3871 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3872 as_modifier and self._match_text_seq("USING", "SAMPLE") 3873 ): 3874 return None 3875 3876 bucket_numerator = None 3877 bucket_denominator = None 3878 bucket_field = None 3879 percent = None 3880 size = None 3881 seed = None 3882 3883 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3884 matched_l_paren = self._match(TokenType.L_PAREN) 3885 3886 if self.TABLESAMPLE_CSV: 3887 num = None 3888 expressions = self._parse_csv(self._parse_primary) 3889 else: 3890 expressions = None 3891 num = ( 3892 self._parse_factor() 3893 if self._match(TokenType.NUMBER, advance=False) 3894 else self._parse_primary() or self._parse_placeholder() 3895 ) 3896 3897 if self._match_text_seq("BUCKET"): 3898 bucket_numerator = self._parse_number() 3899 self._match_text_seq("OUT", "OF") 3900 bucket_denominator = bucket_denominator = self._parse_number() 3901 self._match(TokenType.ON) 3902 bucket_field = self._parse_field() 3903 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3904 percent = num 3905 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3906 size = num 3907 else: 3908 percent = num 3909 3910 if matched_l_paren: 3911 self._match_r_paren() 3912 3913 if self._match(TokenType.L_PAREN): 3914 method = self._parse_var(upper=True) 3915 seed = self._match(TokenType.COMMA) and self._parse_number() 3916 self._match_r_paren() 3917 elif self._match_texts(("SEED", "REPEATABLE")): 3918 seed = self._parse_wrapped(self._parse_number) 3919 3920 if not method and self.DEFAULT_SAMPLING_METHOD: 3921 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3922 3923 return self.expression( 3924 exp.TableSample, 3925 expressions=expressions, 3926 method=method, 3927 bucket_numerator=bucket_numerator, 3928 bucket_denominator=bucket_denominator, 3929 bucket_field=bucket_field, 3930 percent=percent, 3931 size=size, 3932 seed=seed, 3933 ) 3934 3935 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3936 return list(iter(self._parse_pivot, None)) or None 3937 3938 def _parse_joins(self) -> t.Iterator[exp.Join]: 3939 return iter(self._parse_join, None) 3940 3941 # https://duckdb.org/docs/sql/statements/pivot 3942 def _parse_simplified_pivot(self) -> exp.Pivot: 3943 def _parse_on() -> t.Optional[exp.Expression]: 3944 this = self._parse_bitwise() 3945 return self._parse_in(this) if self._match(TokenType.IN) else this 3946 3947 this = self._parse_table() 3948 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3949 using = self._match(TokenType.USING) and self._parse_csv( 3950 lambda: self._parse_alias(self._parse_function()) 3951 ) 3952 group = self._parse_group() 3953 return self.expression( 3954 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3955 ) 3956 3957 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3958 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3959 this = self._parse_select_or_expression() 3960 3961 self._match(TokenType.ALIAS) 3962 alias = self._parse_bitwise() 3963 if alias: 3964 if isinstance(alias, exp.Column) and not alias.db: 3965 alias = alias.this 3966 return self.expression(exp.PivotAlias, this=this, alias=alias) 3967 3968 return this 3969 3970 value = self._parse_column() 3971 3972 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3973 self.raise_error("Expecting IN (") 3974 3975 if self._match(TokenType.ANY): 3976 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3977 else: 3978 exprs = self._parse_csv(_parse_aliased_expression) 3979 3980 self._match_r_paren() 3981 return self.expression(exp.In, this=value, expressions=exprs) 3982 3983 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3984 index = self._index 3985 include_nulls = None 3986 3987 if self._match(TokenType.PIVOT): 3988 unpivot = False 3989 elif self._match(TokenType.UNPIVOT): 3990 unpivot = True 3991 3992 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3993 if self._match_text_seq("INCLUDE", "NULLS"): 3994 include_nulls = True 3995 elif self._match_text_seq("EXCLUDE", "NULLS"): 3996 include_nulls = False 3997 else: 3998 return None 3999 4000 expressions = [] 4001 4002 if not self._match(TokenType.L_PAREN): 4003 self._retreat(index) 4004 return None 4005 4006 if unpivot: 4007 expressions = self._parse_csv(self._parse_column) 4008 else: 4009 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4010 4011 if not expressions: 4012 self.raise_error("Failed to parse PIVOT's aggregation list") 4013 4014 if not self._match(TokenType.FOR): 4015 self.raise_error("Expecting FOR") 4016 4017 field = self._parse_pivot_in() 4018 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4019 self._parse_bitwise 4020 ) 4021 4022 self._match_r_paren() 4023 4024 pivot = self.expression( 4025 exp.Pivot, 4026 expressions=expressions, 4027 field=field, 4028 unpivot=unpivot, 4029 include_nulls=include_nulls, 4030 default_on_null=default_on_null, 4031 ) 4032 4033 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4034 pivot.set("alias", self._parse_table_alias()) 4035 4036 if not unpivot: 4037 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4038 4039 columns: t.List[exp.Expression] = [] 4040 for fld in pivot.args["field"].expressions: 4041 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4042 for name in names: 4043 if self.PREFIXED_PIVOT_COLUMNS: 4044 name = f"{name}_{field_name}" if name else field_name 4045 else: 4046 name = f"{field_name}_{name}" if name else field_name 4047 4048 columns.append(exp.to_identifier(name)) 4049 4050 pivot.set("columns", columns) 4051 4052 return pivot 4053 4054 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4055 return [agg.alias for agg in aggregations] 4056 4057 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4058 if not skip_where_token and not self._match(TokenType.PREWHERE): 4059 return None 4060 4061 return self.expression( 4062 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4063 ) 4064 4065 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4066 if not skip_where_token and not self._match(TokenType.WHERE): 4067 return None 4068 4069 return self.expression( 4070 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4071 ) 4072 4073 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4074 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4075 return None 4076 4077 elements: t.Dict[str, t.Any] = defaultdict(list) 4078 4079 if self._match(TokenType.ALL): 4080 elements["all"] = True 4081 elif self._match(TokenType.DISTINCT): 4082 elements["all"] = False 4083 4084 while True: 4085 index = self._index 4086 4087 elements["expressions"].extend( 4088 self._parse_csv( 4089 lambda: None 4090 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4091 else self._parse_assignment() 4092 ) 4093 ) 4094 4095 before_with_index = self._index 4096 with_prefix = self._match(TokenType.WITH) 4097 4098 if self._match(TokenType.ROLLUP): 4099 elements["rollup"].append( 4100 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4101 ) 4102 elif self._match(TokenType.CUBE): 4103 elements["cube"].append( 4104 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4105 ) 4106 elif self._match(TokenType.GROUPING_SETS): 4107 elements["grouping_sets"].append( 4108 self.expression( 4109 exp.GroupingSets, 4110 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4111 ) 4112 ) 4113 elif self._match_text_seq("TOTALS"): 4114 elements["totals"] = True # type: ignore 4115 4116 if before_with_index <= self._index <= before_with_index + 1: 4117 self._retreat(before_with_index) 4118 break 4119 4120 if index == self._index: 4121 break 4122 4123 return self.expression(exp.Group, **elements) # type: ignore 4124 4125 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4126 return self.expression( 4127 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4128 ) 4129 4130 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4131 if self._match(TokenType.L_PAREN): 4132 grouping_set = self._parse_csv(self._parse_column) 4133 self._match_r_paren() 4134 return self.expression(exp.Tuple, expressions=grouping_set) 4135 4136 return self._parse_column() 4137 4138 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4139 if not skip_having_token and not self._match(TokenType.HAVING): 4140 return None 4141 return self.expression(exp.Having, this=self._parse_assignment()) 4142 4143 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4144 if not self._match(TokenType.QUALIFY): 4145 return None 4146 return self.expression(exp.Qualify, this=self._parse_assignment()) 4147 4148 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4149 if skip_start_token: 4150 start = None 4151 elif self._match(TokenType.START_WITH): 4152 start = self._parse_assignment() 4153 else: 4154 return None 4155 4156 self._match(TokenType.CONNECT_BY) 4157 nocycle = self._match_text_seq("NOCYCLE") 4158 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4159 exp.Prior, this=self._parse_bitwise() 4160 ) 4161 connect = self._parse_assignment() 4162 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4163 4164 if not start and self._match(TokenType.START_WITH): 4165 start = self._parse_assignment() 4166 4167 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4168 4169 def _parse_name_as_expression(self) -> exp.Alias: 4170 return self.expression( 4171 exp.Alias, 4172 alias=self._parse_id_var(any_token=True), 4173 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4174 ) 4175 4176 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4177 if self._match_text_seq("INTERPOLATE"): 4178 return self._parse_wrapped_csv(self._parse_name_as_expression) 4179 return None 4180 4181 def _parse_order( 4182 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4183 ) -> t.Optional[exp.Expression]: 4184 siblings = None 4185 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4186 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4187 return this 4188 4189 siblings = True 4190 4191 return self.expression( 4192 exp.Order, 4193 this=this, 4194 expressions=self._parse_csv(self._parse_ordered), 4195 siblings=siblings, 4196 ) 4197 4198 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4199 if not self._match(token): 4200 return None 4201 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4202 4203 def _parse_ordered( 4204 self, parse_method: t.Optional[t.Callable] = None 4205 ) -> t.Optional[exp.Ordered]: 4206 this = parse_method() if parse_method else self._parse_assignment() 4207 if not this: 4208 return None 4209 4210 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4211 this = exp.var("ALL") 4212 4213 asc = self._match(TokenType.ASC) 4214 desc = self._match(TokenType.DESC) or (asc and False) 4215 4216 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4217 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4218 4219 nulls_first = is_nulls_first or False 4220 explicitly_null_ordered = is_nulls_first or is_nulls_last 4221 4222 if ( 4223 not explicitly_null_ordered 4224 and ( 4225 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4226 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4227 ) 4228 and self.dialect.NULL_ORDERING != "nulls_are_last" 4229 ): 4230 nulls_first = True 4231 4232 if self._match_text_seq("WITH", "FILL"): 4233 with_fill = self.expression( 4234 exp.WithFill, 4235 **{ # type: ignore 4236 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4237 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4238 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4239 "interpolate": self._parse_interpolate(), 4240 }, 4241 ) 4242 else: 4243 with_fill = None 4244 4245 return self.expression( 4246 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4247 ) 4248 4249 def _parse_limit( 4250 self, 4251 this: t.Optional[exp.Expression] = None, 4252 top: bool = False, 4253 skip_limit_token: bool = False, 4254 ) -> t.Optional[exp.Expression]: 4255 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4256 comments = self._prev_comments 4257 if top: 4258 limit_paren = self._match(TokenType.L_PAREN) 4259 expression = self._parse_term() if limit_paren else self._parse_number() 4260 4261 if limit_paren: 4262 self._match_r_paren() 4263 else: 4264 expression = self._parse_term() 4265 4266 if self._match(TokenType.COMMA): 4267 offset = expression 4268 expression = self._parse_term() 4269 else: 4270 offset = None 4271 4272 limit_exp = self.expression( 4273 exp.Limit, 4274 this=this, 4275 expression=expression, 4276 offset=offset, 4277 comments=comments, 4278 expressions=self._parse_limit_by(), 4279 ) 4280 4281 return limit_exp 4282 4283 if self._match(TokenType.FETCH): 4284 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4285 direction = self._prev.text.upper() if direction else "FIRST" 4286 4287 count = self._parse_field(tokens=self.FETCH_TOKENS) 4288 percent = self._match(TokenType.PERCENT) 4289 4290 self._match_set((TokenType.ROW, TokenType.ROWS)) 4291 4292 only = self._match_text_seq("ONLY") 4293 with_ties = self._match_text_seq("WITH", "TIES") 4294 4295 if only and with_ties: 4296 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4297 4298 return self.expression( 4299 exp.Fetch, 4300 direction=direction, 4301 count=count, 4302 percent=percent, 4303 with_ties=with_ties, 4304 ) 4305 4306 return this 4307 4308 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4309 if not self._match(TokenType.OFFSET): 4310 return this 4311 4312 count = self._parse_term() 4313 self._match_set((TokenType.ROW, TokenType.ROWS)) 4314 4315 return self.expression( 4316 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4317 ) 4318 4319 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4320 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4321 4322 def _parse_locks(self) -> t.List[exp.Lock]: 4323 locks = [] 4324 while True: 4325 if self._match_text_seq("FOR", "UPDATE"): 4326 update = True 4327 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4328 "LOCK", "IN", "SHARE", "MODE" 4329 ): 4330 update = False 4331 else: 4332 break 4333 4334 expressions = None 4335 if self._match_text_seq("OF"): 4336 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4337 4338 wait: t.Optional[bool | exp.Expression] = None 4339 if self._match_text_seq("NOWAIT"): 4340 wait = True 4341 elif self._match_text_seq("WAIT"): 4342 wait = self._parse_primary() 4343 elif self._match_text_seq("SKIP", "LOCKED"): 4344 wait = False 4345 4346 locks.append( 4347 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4348 ) 4349 4350 return locks 4351 4352 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4353 while this and self._match_set(self.SET_OPERATIONS): 4354 token_type = self._prev.token_type 4355 4356 if token_type == TokenType.UNION: 4357 operation: t.Type[exp.SetOperation] = exp.Union 4358 elif token_type == TokenType.EXCEPT: 4359 operation = exp.Except 4360 else: 4361 operation = exp.Intersect 4362 4363 comments = self._prev.comments 4364 4365 if self._match(TokenType.DISTINCT): 4366 distinct: t.Optional[bool] = True 4367 elif self._match(TokenType.ALL): 4368 distinct = False 4369 else: 4370 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4371 if distinct is None: 4372 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4373 4374 by_name = self._match_text_seq("BY", "NAME") 4375 expression = self._parse_select(nested=True, parse_set_operation=False) 4376 4377 this = self.expression( 4378 operation, 4379 comments=comments, 4380 this=this, 4381 distinct=distinct, 4382 by_name=by_name, 4383 expression=expression, 4384 ) 4385 4386 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4387 expression = this.expression 4388 4389 if expression: 4390 for arg in self.SET_OP_MODIFIERS: 4391 expr = expression.args.get(arg) 4392 if expr: 4393 this.set(arg, expr.pop()) 4394 4395 return this 4396 4397 def _parse_expression(self) -> t.Optional[exp.Expression]: 4398 return self._parse_alias(self._parse_assignment()) 4399 4400 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4401 this = self._parse_disjunction() 4402 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4403 # This allows us to parse <non-identifier token> := <expr> 4404 this = exp.column( 4405 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4406 ) 4407 4408 while self._match_set(self.ASSIGNMENT): 4409 if isinstance(this, exp.Column) and len(this.parts) == 1: 4410 this = this.this 4411 4412 this = self.expression( 4413 self.ASSIGNMENT[self._prev.token_type], 4414 this=this, 4415 comments=self._prev_comments, 4416 expression=self._parse_assignment(), 4417 ) 4418 4419 return this 4420 4421 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4422 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4423 4424 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4425 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4426 4427 def _parse_equality(self) -> t.Optional[exp.Expression]: 4428 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4429 4430 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4431 return self._parse_tokens(self._parse_range, self.COMPARISON) 4432 4433 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4434 this = this or self._parse_bitwise() 4435 negate = self._match(TokenType.NOT) 4436 4437 if self._match_set(self.RANGE_PARSERS): 4438 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4439 if not expression: 4440 return this 4441 4442 this = expression 4443 elif self._match(TokenType.ISNULL): 4444 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4445 4446 # Postgres supports ISNULL and NOTNULL for conditions. 4447 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4448 if self._match(TokenType.NOTNULL): 4449 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4450 this = self.expression(exp.Not, this=this) 4451 4452 if negate: 4453 this = self._negate_range(this) 4454 4455 if self._match(TokenType.IS): 4456 this = self._parse_is(this) 4457 4458 return this 4459 4460 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4461 if not this: 4462 return this 4463 4464 return self.expression(exp.Not, this=this) 4465 4466 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4467 index = self._index - 1 4468 negate = self._match(TokenType.NOT) 4469 4470 if self._match_text_seq("DISTINCT", "FROM"): 4471 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4472 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4473 4474 if self._match(TokenType.JSON): 4475 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4476 4477 if self._match_text_seq("WITH"): 4478 _with = True 4479 elif self._match_text_seq("WITHOUT"): 4480 _with = False 4481 else: 4482 _with = None 4483 4484 unique = self._match(TokenType.UNIQUE) 4485 self._match_text_seq("KEYS") 4486 expression: t.Optional[exp.Expression] = self.expression( 4487 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4488 ) 4489 else: 4490 expression = self._parse_primary() or self._parse_null() 4491 if not expression: 4492 self._retreat(index) 4493 return None 4494 4495 this = self.expression(exp.Is, this=this, expression=expression) 4496 return self.expression(exp.Not, this=this) if negate else this 4497 4498 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4499 unnest = self._parse_unnest(with_alias=False) 4500 if unnest: 4501 this = self.expression(exp.In, this=this, unnest=unnest) 4502 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4503 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4504 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4505 4506 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4507 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4508 else: 4509 this = self.expression(exp.In, this=this, expressions=expressions) 4510 4511 if matched_l_paren: 4512 self._match_r_paren(this) 4513 elif not self._match(TokenType.R_BRACKET, expression=this): 4514 self.raise_error("Expecting ]") 4515 else: 4516 this = self.expression(exp.In, this=this, field=self._parse_column()) 4517 4518 return this 4519 4520 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4521 low = self._parse_bitwise() 4522 self._match(TokenType.AND) 4523 high = self._parse_bitwise() 4524 return self.expression(exp.Between, this=this, low=low, high=high) 4525 4526 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 if not self._match(TokenType.ESCAPE): 4528 return this 4529 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4530 4531 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4532 index = self._index 4533 4534 if not self._match(TokenType.INTERVAL) and match_interval: 4535 return None 4536 4537 if self._match(TokenType.STRING, advance=False): 4538 this = self._parse_primary() 4539 else: 4540 this = self._parse_term() 4541 4542 if not this or ( 4543 isinstance(this, exp.Column) 4544 and not this.table 4545 and not this.this.quoted 4546 and this.name.upper() == "IS" 4547 ): 4548 self._retreat(index) 4549 return None 4550 4551 unit = self._parse_function() or ( 4552 not self._match(TokenType.ALIAS, advance=False) 4553 and self._parse_var(any_token=True, upper=True) 4554 ) 4555 4556 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4557 # each INTERVAL expression into this canonical form so it's easy to transpile 4558 if this and this.is_number: 4559 this = exp.Literal.string(this.to_py()) 4560 elif this and this.is_string: 4561 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4562 if len(parts) == 1: 4563 if unit: 4564 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4565 self._retreat(self._index - 1) 4566 4567 this = exp.Literal.string(parts[0][0]) 4568 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4569 4570 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4571 unit = self.expression( 4572 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4573 ) 4574 4575 interval = self.expression(exp.Interval, this=this, unit=unit) 4576 4577 index = self._index 4578 self._match(TokenType.PLUS) 4579 4580 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4581 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4582 return self.expression( 4583 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4584 ) 4585 4586 self._retreat(index) 4587 return interval 4588 4589 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4590 this = self._parse_term() 4591 4592 while True: 4593 if self._match_set(self.BITWISE): 4594 this = self.expression( 4595 self.BITWISE[self._prev.token_type], 4596 this=this, 4597 expression=self._parse_term(), 4598 ) 4599 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4600 this = self.expression( 4601 exp.DPipe, 4602 this=this, 4603 expression=self._parse_term(), 4604 safe=not self.dialect.STRICT_STRING_CONCAT, 4605 ) 4606 elif self._match(TokenType.DQMARK): 4607 this = self.expression( 4608 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4609 ) 4610 elif self._match_pair(TokenType.LT, TokenType.LT): 4611 this = self.expression( 4612 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4613 ) 4614 elif self._match_pair(TokenType.GT, TokenType.GT): 4615 this = self.expression( 4616 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4617 ) 4618 else: 4619 break 4620 4621 return this 4622 4623 def _parse_term(self) -> t.Optional[exp.Expression]: 4624 this = self._parse_factor() 4625 4626 while self._match_set(self.TERM): 4627 klass = self.TERM[self._prev.token_type] 4628 comments = self._prev_comments 4629 expression = self._parse_factor() 4630 4631 this = self.expression(klass, this=this, comments=comments, expression=expression) 4632 4633 if isinstance(this, exp.Collate): 4634 expr = this.expression 4635 4636 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4637 # fallback to Identifier / Var 4638 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4639 ident = expr.this 4640 if isinstance(ident, exp.Identifier): 4641 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4642 4643 return this 4644 4645 def _parse_factor(self) -> t.Optional[exp.Expression]: 4646 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4647 this = parse_method() 4648 4649 while self._match_set(self.FACTOR): 4650 klass = self.FACTOR[self._prev.token_type] 4651 comments = self._prev_comments 4652 expression = parse_method() 4653 4654 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4655 self._retreat(self._index - 1) 4656 return this 4657 4658 this = self.expression(klass, this=this, comments=comments, expression=expression) 4659 4660 if isinstance(this, exp.Div): 4661 this.args["typed"] = self.dialect.TYPED_DIVISION 4662 this.args["safe"] = self.dialect.SAFE_DIVISION 4663 4664 return this 4665 4666 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4667 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4668 4669 def _parse_unary(self) -> t.Optional[exp.Expression]: 4670 if self._match_set(self.UNARY_PARSERS): 4671 return self.UNARY_PARSERS[self._prev.token_type](self) 4672 return self._parse_at_time_zone(self._parse_type()) 4673 4674 def _parse_type( 4675 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4676 ) -> t.Optional[exp.Expression]: 4677 interval = parse_interval and self._parse_interval() 4678 if interval: 4679 return interval 4680 4681 index = self._index 4682 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4683 4684 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4685 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4686 if isinstance(data_type, exp.Cast): 4687 # This constructor can contain ops directly after it, for instance struct unnesting: 4688 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4689 return self._parse_column_ops(data_type) 4690 4691 if data_type: 4692 index2 = self._index 4693 this = self._parse_primary() 4694 4695 if isinstance(this, exp.Literal): 4696 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4697 if parser: 4698 return parser(self, this, data_type) 4699 4700 return self.expression(exp.Cast, this=this, to=data_type) 4701 4702 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4703 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4704 # 4705 # If the index difference here is greater than 1, that means the parser itself must have 4706 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4707 # 4708 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4709 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4710 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4711 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4712 # 4713 # In these cases, we don't really want to return the converted type, but instead retreat 4714 # and try to parse a Column or Identifier in the section below. 4715 if data_type.expressions and index2 - index > 1: 4716 self._retreat(index2) 4717 return self._parse_column_ops(data_type) 4718 4719 self._retreat(index) 4720 4721 if fallback_to_identifier: 4722 return self._parse_id_var() 4723 4724 this = self._parse_column() 4725 return this and self._parse_column_ops(this) 4726 4727 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4728 this = self._parse_type() 4729 if not this: 4730 return None 4731 4732 if isinstance(this, exp.Column) and not this.table: 4733 this = exp.var(this.name.upper()) 4734 4735 return self.expression( 4736 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4737 ) 4738 4739 def _parse_types( 4740 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4741 ) -> t.Optional[exp.Expression]: 4742 index = self._index 4743 4744 this: t.Optional[exp.Expression] = None 4745 prefix = self._match_text_seq("SYSUDTLIB", ".") 4746 4747 if not self._match_set(self.TYPE_TOKENS): 4748 identifier = allow_identifiers and self._parse_id_var( 4749 any_token=False, tokens=(TokenType.VAR,) 4750 ) 4751 if isinstance(identifier, exp.Identifier): 4752 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4753 4754 if len(tokens) != 1: 4755 self.raise_error("Unexpected identifier", self._prev) 4756 4757 if tokens[0].token_type in self.TYPE_TOKENS: 4758 self._prev = tokens[0] 4759 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4760 type_name = identifier.name 4761 4762 while self._match(TokenType.DOT): 4763 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4764 4765 this = exp.DataType.build(type_name, udt=True) 4766 else: 4767 self._retreat(self._index - 1) 4768 return None 4769 else: 4770 return None 4771 4772 type_token = self._prev.token_type 4773 4774 if type_token == TokenType.PSEUDO_TYPE: 4775 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4776 4777 if type_token == TokenType.OBJECT_IDENTIFIER: 4778 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4779 4780 # https://materialize.com/docs/sql/types/map/ 4781 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4782 key_type = self._parse_types( 4783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4784 ) 4785 if not self._match(TokenType.FARROW): 4786 self._retreat(index) 4787 return None 4788 4789 value_type = self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 if not self._match(TokenType.R_BRACKET): 4793 self._retreat(index) 4794 return None 4795 4796 return exp.DataType( 4797 this=exp.DataType.Type.MAP, 4798 expressions=[key_type, value_type], 4799 nested=True, 4800 prefix=prefix, 4801 ) 4802 4803 nested = type_token in self.NESTED_TYPE_TOKENS 4804 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4805 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4806 expressions = None 4807 maybe_func = False 4808 4809 if self._match(TokenType.L_PAREN): 4810 if is_struct: 4811 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4812 elif nested: 4813 expressions = self._parse_csv( 4814 lambda: self._parse_types( 4815 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4816 ) 4817 ) 4818 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4819 this = expressions[0] 4820 this.set("nullable", True) 4821 self._match_r_paren() 4822 return this 4823 elif type_token in self.ENUM_TYPE_TOKENS: 4824 expressions = self._parse_csv(self._parse_equality) 4825 elif is_aggregate: 4826 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4827 any_token=False, tokens=(TokenType.VAR,) 4828 ) 4829 if not func_or_ident or not self._match(TokenType.COMMA): 4830 return None 4831 expressions = self._parse_csv( 4832 lambda: self._parse_types( 4833 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4834 ) 4835 ) 4836 expressions.insert(0, func_or_ident) 4837 else: 4838 expressions = self._parse_csv(self._parse_type_size) 4839 4840 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4841 if type_token == TokenType.VECTOR and len(expressions) == 2: 4842 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4843 4844 if not expressions or not self._match(TokenType.R_PAREN): 4845 self._retreat(index) 4846 return None 4847 4848 maybe_func = True 4849 4850 values: t.Optional[t.List[exp.Expression]] = None 4851 4852 if nested and self._match(TokenType.LT): 4853 if is_struct: 4854 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4855 else: 4856 expressions = self._parse_csv( 4857 lambda: self._parse_types( 4858 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4859 ) 4860 ) 4861 4862 if not self._match(TokenType.GT): 4863 self.raise_error("Expecting >") 4864 4865 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4866 values = self._parse_csv(self._parse_assignment) 4867 if not values and is_struct: 4868 values = None 4869 self._retreat(self._index - 1) 4870 else: 4871 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4872 4873 if type_token in self.TIMESTAMPS: 4874 if self._match_text_seq("WITH", "TIME", "ZONE"): 4875 maybe_func = False 4876 tz_type = ( 4877 exp.DataType.Type.TIMETZ 4878 if type_token in self.TIMES 4879 else exp.DataType.Type.TIMESTAMPTZ 4880 ) 4881 this = exp.DataType(this=tz_type, expressions=expressions) 4882 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4883 maybe_func = False 4884 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4885 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4886 maybe_func = False 4887 elif type_token == TokenType.INTERVAL: 4888 unit = self._parse_var(upper=True) 4889 if unit: 4890 if self._match_text_seq("TO"): 4891 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4892 4893 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4894 else: 4895 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4896 4897 if maybe_func and check_func: 4898 index2 = self._index 4899 peek = self._parse_string() 4900 4901 if not peek: 4902 self._retreat(index) 4903 return None 4904 4905 self._retreat(index2) 4906 4907 if not this: 4908 if self._match_text_seq("UNSIGNED"): 4909 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4910 if not unsigned_type_token: 4911 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4912 4913 type_token = unsigned_type_token or type_token 4914 4915 this = exp.DataType( 4916 this=exp.DataType.Type[type_token.value], 4917 expressions=expressions, 4918 nested=nested, 4919 prefix=prefix, 4920 ) 4921 4922 # Empty arrays/structs are allowed 4923 if values is not None: 4924 cls = exp.Struct if is_struct else exp.Array 4925 this = exp.cast(cls(expressions=values), this, copy=False) 4926 4927 elif expressions: 4928 this.set("expressions", expressions) 4929 4930 # https://materialize.com/docs/sql/types/list/#type-name 4931 while self._match(TokenType.LIST): 4932 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4933 4934 index = self._index 4935 4936 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4937 matched_array = self._match(TokenType.ARRAY) 4938 4939 while self._curr: 4940 datatype_token = self._prev.token_type 4941 matched_l_bracket = self._match(TokenType.L_BRACKET) 4942 if not matched_l_bracket and not matched_array: 4943 break 4944 4945 matched_array = False 4946 values = self._parse_csv(self._parse_assignment) or None 4947 if ( 4948 values 4949 and not schema 4950 and ( 4951 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4952 ) 4953 ): 4954 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4955 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4956 self._retreat(index) 4957 break 4958 4959 this = exp.DataType( 4960 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4961 ) 4962 self._match(TokenType.R_BRACKET) 4963 4964 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4965 converter = self.TYPE_CONVERTERS.get(this.this) 4966 if converter: 4967 this = converter(t.cast(exp.DataType, this)) 4968 4969 return this 4970 4971 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4972 index = self._index 4973 4974 if ( 4975 self._curr 4976 and self._next 4977 and self._curr.token_type in self.TYPE_TOKENS 4978 and self._next.token_type in self.TYPE_TOKENS 4979 ): 4980 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4981 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4982 this = self._parse_id_var() 4983 else: 4984 this = ( 4985 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4986 or self._parse_id_var() 4987 ) 4988 4989 self._match(TokenType.COLON) 4990 4991 if ( 4992 type_required 4993 and not isinstance(this, exp.DataType) 4994 and not self._match_set(self.TYPE_TOKENS, advance=False) 4995 ): 4996 self._retreat(index) 4997 return self._parse_types() 4998 4999 return self._parse_column_def(this) 5000 5001 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5002 if not self._match_text_seq("AT", "TIME", "ZONE"): 5003 return this 5004 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5005 5006 def _parse_column(self) -> t.Optional[exp.Expression]: 5007 this = self._parse_column_reference() 5008 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5009 5010 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5011 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5012 5013 return column 5014 5015 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5016 this = self._parse_field() 5017 if ( 5018 not this 5019 and self._match(TokenType.VALUES, advance=False) 5020 and self.VALUES_FOLLOWED_BY_PAREN 5021 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5022 ): 5023 this = self._parse_id_var() 5024 5025 if isinstance(this, exp.Identifier): 5026 # We bubble up comments from the Identifier to the Column 5027 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5028 5029 return this 5030 5031 def _parse_colon_as_variant_extract( 5032 self, this: t.Optional[exp.Expression] 5033 ) -> t.Optional[exp.Expression]: 5034 casts = [] 5035 json_path = [] 5036 escape = None 5037 5038 while self._match(TokenType.COLON): 5039 start_index = self._index 5040 5041 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5042 path = self._parse_column_ops( 5043 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5044 ) 5045 5046 # The cast :: operator has a lower precedence than the extraction operator :, so 5047 # we rearrange the AST appropriately to avoid casting the JSON path 5048 while isinstance(path, exp.Cast): 5049 casts.append(path.to) 5050 path = path.this 5051 5052 if casts: 5053 dcolon_offset = next( 5054 i 5055 for i, t in enumerate(self._tokens[start_index:]) 5056 if t.token_type == TokenType.DCOLON 5057 ) 5058 end_token = self._tokens[start_index + dcolon_offset - 1] 5059 else: 5060 end_token = self._prev 5061 5062 if path: 5063 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5064 # it'll roundtrip to a string literal in GET_PATH 5065 if isinstance(path, exp.Identifier) and path.quoted: 5066 escape = True 5067 5068 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5069 5070 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5071 # Databricks transforms it back to the colon/dot notation 5072 if json_path: 5073 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5074 5075 if json_path_expr: 5076 json_path_expr.set("escape", escape) 5077 5078 this = self.expression( 5079 exp.JSONExtract, 5080 this=this, 5081 expression=json_path_expr, 5082 variant_extract=True, 5083 ) 5084 5085 while casts: 5086 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5087 5088 return this 5089 5090 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5091 return self._parse_types() 5092 5093 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5094 this = self._parse_bracket(this) 5095 5096 while self._match_set(self.COLUMN_OPERATORS): 5097 op_token = self._prev.token_type 5098 op = self.COLUMN_OPERATORS.get(op_token) 5099 5100 if op_token == TokenType.DCOLON: 5101 field = self._parse_dcolon() 5102 if not field: 5103 self.raise_error("Expected type") 5104 elif op and self._curr: 5105 field = self._parse_column_reference() or self._parse_bracket() 5106 else: 5107 field = self._parse_field(any_token=True, anonymous_func=True) 5108 5109 if isinstance(field, exp.Func) and this: 5110 # bigquery allows function calls like x.y.count(...) 5111 # SAFE.SUBSTR(...) 5112 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5113 this = exp.replace_tree( 5114 this, 5115 lambda n: ( 5116 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5117 if n.table 5118 else n.this 5119 ) 5120 if isinstance(n, exp.Column) 5121 else n, 5122 ) 5123 5124 if op: 5125 this = op(self, this, field) 5126 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5127 this = self.expression( 5128 exp.Column, 5129 comments=this.comments, 5130 this=field, 5131 table=this.this, 5132 db=this.args.get("table"), 5133 catalog=this.args.get("db"), 5134 ) 5135 else: 5136 this = self.expression(exp.Dot, this=this, expression=field) 5137 5138 this = self._parse_bracket(this) 5139 5140 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5141 5142 def _parse_primary(self) -> t.Optional[exp.Expression]: 5143 if self._match_set(self.PRIMARY_PARSERS): 5144 token_type = self._prev.token_type 5145 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5146 5147 if token_type == TokenType.STRING: 5148 expressions = [primary] 5149 while self._match(TokenType.STRING): 5150 expressions.append(exp.Literal.string(self._prev.text)) 5151 5152 if len(expressions) > 1: 5153 return self.expression(exp.Concat, expressions=expressions) 5154 5155 return primary 5156 5157 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5158 return exp.Literal.number(f"0.{self._prev.text}") 5159 5160 if self._match(TokenType.L_PAREN): 5161 comments = self._prev_comments 5162 query = self._parse_select() 5163 5164 if query: 5165 expressions = [query] 5166 else: 5167 expressions = self._parse_expressions() 5168 5169 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5170 5171 if not this and self._match(TokenType.R_PAREN, advance=False): 5172 this = self.expression(exp.Tuple) 5173 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5174 this = self._parse_subquery(this=this, parse_alias=False) 5175 elif isinstance(this, exp.Subquery): 5176 this = self._parse_subquery( 5177 this=self._parse_set_operations(this), parse_alias=False 5178 ) 5179 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5180 this = self.expression(exp.Tuple, expressions=expressions) 5181 else: 5182 this = self.expression(exp.Paren, this=this) 5183 5184 if this: 5185 this.add_comments(comments) 5186 5187 self._match_r_paren(expression=this) 5188 return this 5189 5190 return None 5191 5192 def _parse_field( 5193 self, 5194 any_token: bool = False, 5195 tokens: t.Optional[t.Collection[TokenType]] = None, 5196 anonymous_func: bool = False, 5197 ) -> t.Optional[exp.Expression]: 5198 if anonymous_func: 5199 field = ( 5200 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5201 or self._parse_primary() 5202 ) 5203 else: 5204 field = self._parse_primary() or self._parse_function( 5205 anonymous=anonymous_func, any_token=any_token 5206 ) 5207 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5208 5209 def _parse_function( 5210 self, 5211 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5212 anonymous: bool = False, 5213 optional_parens: bool = True, 5214 any_token: bool = False, 5215 ) -> t.Optional[exp.Expression]: 5216 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5217 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5218 fn_syntax = False 5219 if ( 5220 self._match(TokenType.L_BRACE, advance=False) 5221 and self._next 5222 and self._next.text.upper() == "FN" 5223 ): 5224 self._advance(2) 5225 fn_syntax = True 5226 5227 func = self._parse_function_call( 5228 functions=functions, 5229 anonymous=anonymous, 5230 optional_parens=optional_parens, 5231 any_token=any_token, 5232 ) 5233 5234 if fn_syntax: 5235 self._match(TokenType.R_BRACE) 5236 5237 return func 5238 5239 def _parse_function_call( 5240 self, 5241 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5242 anonymous: bool = False, 5243 optional_parens: bool = True, 5244 any_token: bool = False, 5245 ) -> t.Optional[exp.Expression]: 5246 if not self._curr: 5247 return None 5248 5249 comments = self._curr.comments 5250 token_type = self._curr.token_type 5251 this = self._curr.text 5252 upper = this.upper() 5253 5254 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5255 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5256 self._advance() 5257 return self._parse_window(parser(self)) 5258 5259 if not self._next or self._next.token_type != TokenType.L_PAREN: 5260 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5261 self._advance() 5262 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5263 5264 return None 5265 5266 if any_token: 5267 if token_type in self.RESERVED_TOKENS: 5268 return None 5269 elif token_type not in self.FUNC_TOKENS: 5270 return None 5271 5272 self._advance(2) 5273 5274 parser = self.FUNCTION_PARSERS.get(upper) 5275 if parser and not anonymous: 5276 this = parser(self) 5277 else: 5278 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5279 5280 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5281 this = self.expression( 5282 subquery_predicate, comments=comments, this=self._parse_select() 5283 ) 5284 self._match_r_paren() 5285 return this 5286 5287 if functions is None: 5288 functions = self.FUNCTIONS 5289 5290 function = functions.get(upper) 5291 5292 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5293 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5294 5295 if alias: 5296 args = self._kv_to_prop_eq(args) 5297 5298 if function and not anonymous: 5299 if "dialect" in function.__code__.co_varnames: 5300 func = function(args, dialect=self.dialect) 5301 else: 5302 func = function(args) 5303 5304 func = self.validate_expression(func, args) 5305 if not self.dialect.NORMALIZE_FUNCTIONS: 5306 func.meta["name"] = this 5307 5308 this = func 5309 else: 5310 if token_type == TokenType.IDENTIFIER: 5311 this = exp.Identifier(this=this, quoted=True) 5312 this = self.expression(exp.Anonymous, this=this, expressions=args) 5313 5314 if isinstance(this, exp.Expression): 5315 this.add_comments(comments) 5316 5317 self._match_r_paren(this) 5318 return self._parse_window(this) 5319 5320 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5321 return expression 5322 5323 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5324 transformed = [] 5325 5326 for index, e in enumerate(expressions): 5327 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5328 if isinstance(e, exp.Alias): 5329 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5330 5331 if not isinstance(e, exp.PropertyEQ): 5332 e = self.expression( 5333 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5334 ) 5335 5336 if isinstance(e.this, exp.Column): 5337 e.this.replace(e.this.this) 5338 else: 5339 e = self._to_prop_eq(e, index) 5340 5341 transformed.append(e) 5342 5343 return transformed 5344 5345 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5346 return self._parse_column_def(self._parse_id_var()) 5347 5348 def _parse_user_defined_function( 5349 self, kind: t.Optional[TokenType] = None 5350 ) -> t.Optional[exp.Expression]: 5351 this = self._parse_id_var() 5352 5353 while self._match(TokenType.DOT): 5354 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5355 5356 if not self._match(TokenType.L_PAREN): 5357 return this 5358 5359 expressions = self._parse_csv(self._parse_function_parameter) 5360 self._match_r_paren() 5361 return self.expression( 5362 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5363 ) 5364 5365 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5366 literal = self._parse_primary() 5367 if literal: 5368 return self.expression(exp.Introducer, this=token.text, expression=literal) 5369 5370 return self.expression(exp.Identifier, this=token.text) 5371 5372 def _parse_session_parameter(self) -> exp.SessionParameter: 5373 kind = None 5374 this = self._parse_id_var() or self._parse_primary() 5375 5376 if this and self._match(TokenType.DOT): 5377 kind = this.name 5378 this = self._parse_var() or self._parse_primary() 5379 5380 return self.expression(exp.SessionParameter, this=this, kind=kind) 5381 5382 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5383 return self._parse_id_var() 5384 5385 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5386 index = self._index 5387 5388 if self._match(TokenType.L_PAREN): 5389 expressions = t.cast( 5390 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5391 ) 5392 5393 if not self._match(TokenType.R_PAREN): 5394 self._retreat(index) 5395 else: 5396 expressions = [self._parse_lambda_arg()] 5397 5398 if self._match_set(self.LAMBDAS): 5399 return self.LAMBDAS[self._prev.token_type](self, expressions) 5400 5401 self._retreat(index) 5402 5403 this: t.Optional[exp.Expression] 5404 5405 if self._match(TokenType.DISTINCT): 5406 this = self.expression( 5407 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5408 ) 5409 else: 5410 this = self._parse_select_or_expression(alias=alias) 5411 5412 return self._parse_limit( 5413 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5414 ) 5415 5416 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5417 index = self._index 5418 if not self._match(TokenType.L_PAREN): 5419 return this 5420 5421 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5422 # expr can be of both types 5423 if self._match_set(self.SELECT_START_TOKENS): 5424 self._retreat(index) 5425 return this 5426 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5427 self._match_r_paren() 5428 return self.expression(exp.Schema, this=this, expressions=args) 5429 5430 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5431 return self._parse_column_def(self._parse_field(any_token=True)) 5432 5433 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5434 # column defs are not really columns, they're identifiers 5435 if isinstance(this, exp.Column): 5436 this = this.this 5437 5438 kind = self._parse_types(schema=True) 5439 5440 if self._match_text_seq("FOR", "ORDINALITY"): 5441 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5442 5443 constraints: t.List[exp.Expression] = [] 5444 5445 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5446 ("ALIAS", "MATERIALIZED") 5447 ): 5448 persisted = self._prev.text.upper() == "MATERIALIZED" 5449 constraint_kind = exp.ComputedColumnConstraint( 5450 this=self._parse_assignment(), 5451 persisted=persisted or self._match_text_seq("PERSISTED"), 5452 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5453 ) 5454 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5455 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5456 self._match(TokenType.ALIAS) 5457 constraints.append( 5458 self.expression( 5459 exp.ColumnConstraint, 5460 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5461 ) 5462 ) 5463 5464 while True: 5465 constraint = self._parse_column_constraint() 5466 if not constraint: 5467 break 5468 constraints.append(constraint) 5469 5470 if not kind and not constraints: 5471 return this 5472 5473 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5474 5475 def _parse_auto_increment( 5476 self, 5477 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5478 start = None 5479 increment = None 5480 5481 if self._match(TokenType.L_PAREN, advance=False): 5482 args = self._parse_wrapped_csv(self._parse_bitwise) 5483 start = seq_get(args, 0) 5484 increment = seq_get(args, 1) 5485 elif self._match_text_seq("START"): 5486 start = self._parse_bitwise() 5487 self._match_text_seq("INCREMENT") 5488 increment = self._parse_bitwise() 5489 5490 if start and increment: 5491 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5492 5493 return exp.AutoIncrementColumnConstraint() 5494 5495 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5496 if not self._match_text_seq("REFRESH"): 5497 self._retreat(self._index - 1) 5498 return None 5499 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5500 5501 def _parse_compress(self) -> exp.CompressColumnConstraint: 5502 if self._match(TokenType.L_PAREN, advance=False): 5503 return self.expression( 5504 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5505 ) 5506 5507 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5508 5509 def _parse_generated_as_identity( 5510 self, 5511 ) -> ( 5512 exp.GeneratedAsIdentityColumnConstraint 5513 | exp.ComputedColumnConstraint 5514 | exp.GeneratedAsRowColumnConstraint 5515 ): 5516 if self._match_text_seq("BY", "DEFAULT"): 5517 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5518 this = self.expression( 5519 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5520 ) 5521 else: 5522 self._match_text_seq("ALWAYS") 5523 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5524 5525 self._match(TokenType.ALIAS) 5526 5527 if self._match_text_seq("ROW"): 5528 start = self._match_text_seq("START") 5529 if not start: 5530 self._match(TokenType.END) 5531 hidden = self._match_text_seq("HIDDEN") 5532 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5533 5534 identity = self._match_text_seq("IDENTITY") 5535 5536 if self._match(TokenType.L_PAREN): 5537 if self._match(TokenType.START_WITH): 5538 this.set("start", self._parse_bitwise()) 5539 if self._match_text_seq("INCREMENT", "BY"): 5540 this.set("increment", self._parse_bitwise()) 5541 if self._match_text_seq("MINVALUE"): 5542 this.set("minvalue", self._parse_bitwise()) 5543 if self._match_text_seq("MAXVALUE"): 5544 this.set("maxvalue", self._parse_bitwise()) 5545 5546 if self._match_text_seq("CYCLE"): 5547 this.set("cycle", True) 5548 elif self._match_text_seq("NO", "CYCLE"): 5549 this.set("cycle", False) 5550 5551 if not identity: 5552 this.set("expression", self._parse_range()) 5553 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5554 args = self._parse_csv(self._parse_bitwise) 5555 this.set("start", seq_get(args, 0)) 5556 this.set("increment", seq_get(args, 1)) 5557 5558 self._match_r_paren() 5559 5560 return this 5561 5562 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5563 self._match_text_seq("LENGTH") 5564 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5565 5566 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5567 if self._match_text_seq("NULL"): 5568 return self.expression(exp.NotNullColumnConstraint) 5569 if self._match_text_seq("CASESPECIFIC"): 5570 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5571 if self._match_text_seq("FOR", "REPLICATION"): 5572 return self.expression(exp.NotForReplicationColumnConstraint) 5573 5574 # Unconsume the `NOT` token 5575 self._retreat(self._index - 1) 5576 return None 5577 5578 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5579 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5580 5581 procedure_option_follows = ( 5582 self._match(TokenType.WITH, advance=False) 5583 and self._next 5584 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5585 ) 5586 5587 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5588 return self.expression( 5589 exp.ColumnConstraint, 5590 this=this, 5591 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5592 ) 5593 5594 return this 5595 5596 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5597 if not self._match(TokenType.CONSTRAINT): 5598 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5599 5600 return self.expression( 5601 exp.Constraint, 5602 this=self._parse_id_var(), 5603 expressions=self._parse_unnamed_constraints(), 5604 ) 5605 5606 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5607 constraints = [] 5608 while True: 5609 constraint = self._parse_unnamed_constraint() or self._parse_function() 5610 if not constraint: 5611 break 5612 constraints.append(constraint) 5613 5614 return constraints 5615 5616 def _parse_unnamed_constraint( 5617 self, constraints: t.Optional[t.Collection[str]] = None 5618 ) -> t.Optional[exp.Expression]: 5619 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5620 constraints or self.CONSTRAINT_PARSERS 5621 ): 5622 return None 5623 5624 constraint = self._prev.text.upper() 5625 if constraint not in self.CONSTRAINT_PARSERS: 5626 self.raise_error(f"No parser found for schema constraint {constraint}.") 5627 5628 return self.CONSTRAINT_PARSERS[constraint](self) 5629 5630 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5631 return self._parse_id_var(any_token=False) 5632 5633 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5634 self._match_text_seq("KEY") 5635 return self.expression( 5636 exp.UniqueColumnConstraint, 5637 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5638 this=self._parse_schema(self._parse_unique_key()), 5639 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5640 on_conflict=self._parse_on_conflict(), 5641 ) 5642 5643 def _parse_key_constraint_options(self) -> t.List[str]: 5644 options = [] 5645 while True: 5646 if not self._curr: 5647 break 5648 5649 if self._match(TokenType.ON): 5650 action = None 5651 on = self._advance_any() and self._prev.text 5652 5653 if self._match_text_seq("NO", "ACTION"): 5654 action = "NO ACTION" 5655 elif self._match_text_seq("CASCADE"): 5656 action = "CASCADE" 5657 elif self._match_text_seq("RESTRICT"): 5658 action = "RESTRICT" 5659 elif self._match_pair(TokenType.SET, TokenType.NULL): 5660 action = "SET NULL" 5661 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5662 action = "SET DEFAULT" 5663 else: 5664 self.raise_error("Invalid key constraint") 5665 5666 options.append(f"ON {on} {action}") 5667 else: 5668 var = self._parse_var_from_options( 5669 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5670 ) 5671 if not var: 5672 break 5673 options.append(var.name) 5674 5675 return options 5676 5677 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5678 if match and not self._match(TokenType.REFERENCES): 5679 return None 5680 5681 expressions = None 5682 this = self._parse_table(schema=True) 5683 options = self._parse_key_constraint_options() 5684 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5685 5686 def _parse_foreign_key(self) -> exp.ForeignKey: 5687 expressions = self._parse_wrapped_id_vars() 5688 reference = self._parse_references() 5689 options = {} 5690 5691 while self._match(TokenType.ON): 5692 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5693 self.raise_error("Expected DELETE or UPDATE") 5694 5695 kind = self._prev.text.lower() 5696 5697 if self._match_text_seq("NO", "ACTION"): 5698 action = "NO ACTION" 5699 elif self._match(TokenType.SET): 5700 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5701 action = "SET " + self._prev.text.upper() 5702 else: 5703 self._advance() 5704 action = self._prev.text.upper() 5705 5706 options[kind] = action 5707 5708 return self.expression( 5709 exp.ForeignKey, 5710 expressions=expressions, 5711 reference=reference, 5712 **options, # type: ignore 5713 ) 5714 5715 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5716 return self._parse_field() 5717 5718 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5719 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5720 self._retreat(self._index - 1) 5721 return None 5722 5723 id_vars = self._parse_wrapped_id_vars() 5724 return self.expression( 5725 exp.PeriodForSystemTimeConstraint, 5726 this=seq_get(id_vars, 0), 5727 expression=seq_get(id_vars, 1), 5728 ) 5729 5730 def _parse_primary_key( 5731 self, wrapped_optional: bool = False, in_props: bool = False 5732 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5733 desc = ( 5734 self._match_set((TokenType.ASC, TokenType.DESC)) 5735 and self._prev.token_type == TokenType.DESC 5736 ) 5737 5738 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5739 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5740 5741 expressions = self._parse_wrapped_csv( 5742 self._parse_primary_key_part, optional=wrapped_optional 5743 ) 5744 options = self._parse_key_constraint_options() 5745 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5746 5747 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5748 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5749 5750 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5751 """ 5752 Parses a datetime column in ODBC format. We parse the column into the corresponding 5753 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5754 same as we did for `DATE('yyyy-mm-dd')`. 5755 5756 Reference: 5757 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5758 """ 5759 self._match(TokenType.VAR) 5760 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5761 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5762 if not self._match(TokenType.R_BRACE): 5763 self.raise_error("Expected }") 5764 return expression 5765 5766 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5767 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5768 return this 5769 5770 bracket_kind = self._prev.token_type 5771 if ( 5772 bracket_kind == TokenType.L_BRACE 5773 and self._curr 5774 and self._curr.token_type == TokenType.VAR 5775 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5776 ): 5777 return self._parse_odbc_datetime_literal() 5778 5779 expressions = self._parse_csv( 5780 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5781 ) 5782 5783 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5784 self.raise_error("Expected ]") 5785 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5786 self.raise_error("Expected }") 5787 5788 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5789 if bracket_kind == TokenType.L_BRACE: 5790 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5791 elif not this: 5792 this = build_array_constructor( 5793 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5794 ) 5795 else: 5796 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5797 if constructor_type: 5798 return build_array_constructor( 5799 constructor_type, 5800 args=expressions, 5801 bracket_kind=bracket_kind, 5802 dialect=self.dialect, 5803 ) 5804 5805 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5806 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5807 5808 self._add_comments(this) 5809 return self._parse_bracket(this) 5810 5811 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5812 if self._match(TokenType.COLON): 5813 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5814 return this 5815 5816 def _parse_case(self) -> t.Optional[exp.Expression]: 5817 ifs = [] 5818 default = None 5819 5820 comments = self._prev_comments 5821 expression = self._parse_assignment() 5822 5823 while self._match(TokenType.WHEN): 5824 this = self._parse_assignment() 5825 self._match(TokenType.THEN) 5826 then = self._parse_assignment() 5827 ifs.append(self.expression(exp.If, this=this, true=then)) 5828 5829 if self._match(TokenType.ELSE): 5830 default = self._parse_assignment() 5831 5832 if not self._match(TokenType.END): 5833 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5834 default = exp.column("interval") 5835 else: 5836 self.raise_error("Expected END after CASE", self._prev) 5837 5838 return self.expression( 5839 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5840 ) 5841 5842 def _parse_if(self) -> t.Optional[exp.Expression]: 5843 if self._match(TokenType.L_PAREN): 5844 args = self._parse_csv(self._parse_assignment) 5845 this = self.validate_expression(exp.If.from_arg_list(args), args) 5846 self._match_r_paren() 5847 else: 5848 index = self._index - 1 5849 5850 if self.NO_PAREN_IF_COMMANDS and index == 0: 5851 return self._parse_as_command(self._prev) 5852 5853 condition = self._parse_assignment() 5854 5855 if not condition: 5856 self._retreat(index) 5857 return None 5858 5859 self._match(TokenType.THEN) 5860 true = self._parse_assignment() 5861 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5862 self._match(TokenType.END) 5863 this = self.expression(exp.If, this=condition, true=true, false=false) 5864 5865 return this 5866 5867 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5868 if not self._match_text_seq("VALUE", "FOR"): 5869 self._retreat(self._index - 1) 5870 return None 5871 5872 return self.expression( 5873 exp.NextValueFor, 5874 this=self._parse_column(), 5875 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5876 ) 5877 5878 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5879 this = self._parse_function() or self._parse_var_or_string(upper=True) 5880 5881 if self._match(TokenType.FROM): 5882 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5883 5884 if not self._match(TokenType.COMMA): 5885 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5886 5887 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5888 5889 def _parse_gap_fill(self) -> exp.GapFill: 5890 self._match(TokenType.TABLE) 5891 this = self._parse_table() 5892 5893 self._match(TokenType.COMMA) 5894 args = [this, *self._parse_csv(self._parse_lambda)] 5895 5896 gap_fill = exp.GapFill.from_arg_list(args) 5897 return self.validate_expression(gap_fill, args) 5898 5899 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5900 this = self._parse_assignment() 5901 5902 if not self._match(TokenType.ALIAS): 5903 if self._match(TokenType.COMMA): 5904 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5905 5906 self.raise_error("Expected AS after CAST") 5907 5908 fmt = None 5909 to = self._parse_types() 5910 5911 if self._match(TokenType.FORMAT): 5912 fmt_string = self._parse_string() 5913 fmt = self._parse_at_time_zone(fmt_string) 5914 5915 if not to: 5916 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5917 if to.this in exp.DataType.TEMPORAL_TYPES: 5918 this = self.expression( 5919 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5920 this=this, 5921 format=exp.Literal.string( 5922 format_time( 5923 fmt_string.this if fmt_string else "", 5924 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5925 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5926 ) 5927 ), 5928 safe=safe, 5929 ) 5930 5931 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5932 this.set("zone", fmt.args["zone"]) 5933 return this 5934 elif not to: 5935 self.raise_error("Expected TYPE after CAST") 5936 elif isinstance(to, exp.Identifier): 5937 to = exp.DataType.build(to.name, udt=True) 5938 elif to.this == exp.DataType.Type.CHAR: 5939 if self._match(TokenType.CHARACTER_SET): 5940 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5941 5942 return self.expression( 5943 exp.Cast if strict else exp.TryCast, 5944 this=this, 5945 to=to, 5946 format=fmt, 5947 safe=safe, 5948 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5949 ) 5950 5951 def _parse_string_agg(self) -> exp.Expression: 5952 if self._match(TokenType.DISTINCT): 5953 args: t.List[t.Optional[exp.Expression]] = [ 5954 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5955 ] 5956 if self._match(TokenType.COMMA): 5957 args.extend(self._parse_csv(self._parse_assignment)) 5958 else: 5959 args = self._parse_csv(self._parse_assignment) # type: ignore 5960 5961 index = self._index 5962 if not self._match(TokenType.R_PAREN) and args: 5963 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5964 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5965 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5966 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5967 5968 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5969 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5970 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5971 if not self._match_text_seq("WITHIN", "GROUP"): 5972 self._retreat(index) 5973 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5974 5975 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5976 order = self._parse_order(this=seq_get(args, 0)) 5977 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5978 5979 def _parse_convert( 5980 self, strict: bool, safe: t.Optional[bool] = None 5981 ) -> t.Optional[exp.Expression]: 5982 this = self._parse_bitwise() 5983 5984 if self._match(TokenType.USING): 5985 to: t.Optional[exp.Expression] = self.expression( 5986 exp.CharacterSet, this=self._parse_var() 5987 ) 5988 elif self._match(TokenType.COMMA): 5989 to = self._parse_types() 5990 else: 5991 to = None 5992 5993 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5994 5995 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5996 """ 5997 There are generally two variants of the DECODE function: 5998 5999 - DECODE(bin, charset) 6000 - DECODE(expression, search, result [, search, result] ... [, default]) 6001 6002 The second variant will always be parsed into a CASE expression. Note that NULL 6003 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6004 instead of relying on pattern matching. 6005 """ 6006 args = self._parse_csv(self._parse_assignment) 6007 6008 if len(args) < 3: 6009 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6010 6011 expression, *expressions = args 6012 if not expression: 6013 return None 6014 6015 ifs = [] 6016 for search, result in zip(expressions[::2], expressions[1::2]): 6017 if not search or not result: 6018 return None 6019 6020 if isinstance(search, exp.Literal): 6021 ifs.append( 6022 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6023 ) 6024 elif isinstance(search, exp.Null): 6025 ifs.append( 6026 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6027 ) 6028 else: 6029 cond = exp.or_( 6030 exp.EQ(this=expression.copy(), expression=search), 6031 exp.and_( 6032 exp.Is(this=expression.copy(), expression=exp.Null()), 6033 exp.Is(this=search.copy(), expression=exp.Null()), 6034 copy=False, 6035 ), 6036 copy=False, 6037 ) 6038 ifs.append(exp.If(this=cond, true=result)) 6039 6040 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6041 6042 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6043 self._match_text_seq("KEY") 6044 key = self._parse_column() 6045 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6046 self._match_text_seq("VALUE") 6047 value = self._parse_bitwise() 6048 6049 if not key and not value: 6050 return None 6051 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6052 6053 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6054 if not this or not self._match_text_seq("FORMAT", "JSON"): 6055 return this 6056 6057 return self.expression(exp.FormatJson, this=this) 6058 6059 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6060 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6061 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6062 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6063 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6064 else: 6065 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6066 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6067 6068 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6069 6070 if not empty and not error and not null: 6071 return None 6072 6073 return self.expression( 6074 exp.OnCondition, 6075 empty=empty, 6076 error=error, 6077 null=null, 6078 ) 6079 6080 def _parse_on_handling( 6081 self, on: str, *values: str 6082 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6083 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6084 for value in values: 6085 if self._match_text_seq(value, "ON", on): 6086 return f"{value} ON {on}" 6087 6088 index = self._index 6089 if self._match(TokenType.DEFAULT): 6090 default_value = self._parse_bitwise() 6091 if self._match_text_seq("ON", on): 6092 return default_value 6093 6094 self._retreat(index) 6095 6096 return None 6097 6098 @t.overload 6099 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6100 6101 @t.overload 6102 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6103 6104 def _parse_json_object(self, agg=False): 6105 star = self._parse_star() 6106 expressions = ( 6107 [star] 6108 if star 6109 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6110 ) 6111 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6112 6113 unique_keys = None 6114 if self._match_text_seq("WITH", "UNIQUE"): 6115 unique_keys = True 6116 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6117 unique_keys = False 6118 6119 self._match_text_seq("KEYS") 6120 6121 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6122 self._parse_type() 6123 ) 6124 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6125 6126 return self.expression( 6127 exp.JSONObjectAgg if agg else exp.JSONObject, 6128 expressions=expressions, 6129 null_handling=null_handling, 6130 unique_keys=unique_keys, 6131 return_type=return_type, 6132 encoding=encoding, 6133 ) 6134 6135 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6136 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6137 if not self._match_text_seq("NESTED"): 6138 this = self._parse_id_var() 6139 kind = self._parse_types(allow_identifiers=False) 6140 nested = None 6141 else: 6142 this = None 6143 kind = None 6144 nested = True 6145 6146 path = self._match_text_seq("PATH") and self._parse_string() 6147 nested_schema = nested and self._parse_json_schema() 6148 6149 return self.expression( 6150 exp.JSONColumnDef, 6151 this=this, 6152 kind=kind, 6153 path=path, 6154 nested_schema=nested_schema, 6155 ) 6156 6157 def _parse_json_schema(self) -> exp.JSONSchema: 6158 self._match_text_seq("COLUMNS") 6159 return self.expression( 6160 exp.JSONSchema, 6161 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6162 ) 6163 6164 def _parse_json_table(self) -> exp.JSONTable: 6165 this = self._parse_format_json(self._parse_bitwise()) 6166 path = self._match(TokenType.COMMA) and self._parse_string() 6167 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6168 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6169 schema = self._parse_json_schema() 6170 6171 return exp.JSONTable( 6172 this=this, 6173 schema=schema, 6174 path=path, 6175 error_handling=error_handling, 6176 empty_handling=empty_handling, 6177 ) 6178 6179 def _parse_match_against(self) -> exp.MatchAgainst: 6180 expressions = self._parse_csv(self._parse_column) 6181 6182 self._match_text_seq(")", "AGAINST", "(") 6183 6184 this = self._parse_string() 6185 6186 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6187 modifier = "IN NATURAL LANGUAGE MODE" 6188 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6189 modifier = f"{modifier} WITH QUERY EXPANSION" 6190 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6191 modifier = "IN BOOLEAN MODE" 6192 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6193 modifier = "WITH QUERY EXPANSION" 6194 else: 6195 modifier = None 6196 6197 return self.expression( 6198 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6199 ) 6200 6201 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6202 def _parse_open_json(self) -> exp.OpenJSON: 6203 this = self._parse_bitwise() 6204 path = self._match(TokenType.COMMA) and self._parse_string() 6205 6206 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6207 this = self._parse_field(any_token=True) 6208 kind = self._parse_types() 6209 path = self._parse_string() 6210 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6211 6212 return self.expression( 6213 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6214 ) 6215 6216 expressions = None 6217 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6218 self._match_l_paren() 6219 expressions = self._parse_csv(_parse_open_json_column_def) 6220 6221 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6222 6223 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6224 args = self._parse_csv(self._parse_bitwise) 6225 6226 if self._match(TokenType.IN): 6227 return self.expression( 6228 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6229 ) 6230 6231 if haystack_first: 6232 haystack = seq_get(args, 0) 6233 needle = seq_get(args, 1) 6234 else: 6235 needle = seq_get(args, 0) 6236 haystack = seq_get(args, 1) 6237 6238 return self.expression( 6239 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6240 ) 6241 6242 def _parse_predict(self) -> exp.Predict: 6243 self._match_text_seq("MODEL") 6244 this = self._parse_table() 6245 6246 self._match(TokenType.COMMA) 6247 self._match_text_seq("TABLE") 6248 6249 return self.expression( 6250 exp.Predict, 6251 this=this, 6252 expression=self._parse_table(), 6253 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6254 ) 6255 6256 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6257 args = self._parse_csv(self._parse_table) 6258 return exp.JoinHint(this=func_name.upper(), expressions=args) 6259 6260 def _parse_substring(self) -> exp.Substring: 6261 # Postgres supports the form: substring(string [from int] [for int]) 6262 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6263 6264 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6265 6266 if self._match(TokenType.FROM): 6267 args.append(self._parse_bitwise()) 6268 if self._match(TokenType.FOR): 6269 if len(args) == 1: 6270 args.append(exp.Literal.number(1)) 6271 args.append(self._parse_bitwise()) 6272 6273 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6274 6275 def _parse_trim(self) -> exp.Trim: 6276 # https://www.w3resource.com/sql/character-functions/trim.php 6277 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6278 6279 position = None 6280 collation = None 6281 expression = None 6282 6283 if self._match_texts(self.TRIM_TYPES): 6284 position = self._prev.text.upper() 6285 6286 this = self._parse_bitwise() 6287 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6288 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6289 expression = self._parse_bitwise() 6290 6291 if invert_order: 6292 this, expression = expression, this 6293 6294 if self._match(TokenType.COLLATE): 6295 collation = self._parse_bitwise() 6296 6297 return self.expression( 6298 exp.Trim, this=this, position=position, expression=expression, collation=collation 6299 ) 6300 6301 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6302 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6303 6304 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6305 return self._parse_window(self._parse_id_var(), alias=True) 6306 6307 def _parse_respect_or_ignore_nulls( 6308 self, this: t.Optional[exp.Expression] 6309 ) -> t.Optional[exp.Expression]: 6310 if self._match_text_seq("IGNORE", "NULLS"): 6311 return self.expression(exp.IgnoreNulls, this=this) 6312 if self._match_text_seq("RESPECT", "NULLS"): 6313 return self.expression(exp.RespectNulls, this=this) 6314 return this 6315 6316 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6317 if self._match(TokenType.HAVING): 6318 self._match_texts(("MAX", "MIN")) 6319 max = self._prev.text.upper() != "MIN" 6320 return self.expression( 6321 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6322 ) 6323 6324 return this 6325 6326 def _parse_window( 6327 self, this: t.Optional[exp.Expression], alias: bool = False 6328 ) -> t.Optional[exp.Expression]: 6329 func = this 6330 comments = func.comments if isinstance(func, exp.Expression) else None 6331 6332 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6333 self._match(TokenType.WHERE) 6334 this = self.expression( 6335 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6336 ) 6337 self._match_r_paren() 6338 6339 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6340 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6341 if self._match_text_seq("WITHIN", "GROUP"): 6342 order = self._parse_wrapped(self._parse_order) 6343 this = self.expression(exp.WithinGroup, this=this, expression=order) 6344 6345 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6346 # Some dialects choose to implement and some do not. 6347 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6348 6349 # There is some code above in _parse_lambda that handles 6350 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6351 6352 # The below changes handle 6353 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6354 6355 # Oracle allows both formats 6356 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6357 # and Snowflake chose to do the same for familiarity 6358 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6359 if isinstance(this, exp.AggFunc): 6360 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6361 6362 if ignore_respect and ignore_respect is not this: 6363 ignore_respect.replace(ignore_respect.this) 6364 this = self.expression(ignore_respect.__class__, this=this) 6365 6366 this = self._parse_respect_or_ignore_nulls(this) 6367 6368 # bigquery select from window x AS (partition by ...) 6369 if alias: 6370 over = None 6371 self._match(TokenType.ALIAS) 6372 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6373 return this 6374 else: 6375 over = self._prev.text.upper() 6376 6377 if comments and isinstance(func, exp.Expression): 6378 func.pop_comments() 6379 6380 if not self._match(TokenType.L_PAREN): 6381 return self.expression( 6382 exp.Window, 6383 comments=comments, 6384 this=this, 6385 alias=self._parse_id_var(False), 6386 over=over, 6387 ) 6388 6389 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6390 6391 first = self._match(TokenType.FIRST) 6392 if self._match_text_seq("LAST"): 6393 first = False 6394 6395 partition, order = self._parse_partition_and_order() 6396 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6397 6398 if kind: 6399 self._match(TokenType.BETWEEN) 6400 start = self._parse_window_spec() 6401 self._match(TokenType.AND) 6402 end = self._parse_window_spec() 6403 6404 spec = self.expression( 6405 exp.WindowSpec, 6406 kind=kind, 6407 start=start["value"], 6408 start_side=start["side"], 6409 end=end["value"], 6410 end_side=end["side"], 6411 ) 6412 else: 6413 spec = None 6414 6415 self._match_r_paren() 6416 6417 window = self.expression( 6418 exp.Window, 6419 comments=comments, 6420 this=this, 6421 partition_by=partition, 6422 order=order, 6423 spec=spec, 6424 alias=window_alias, 6425 over=over, 6426 first=first, 6427 ) 6428 6429 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6430 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6431 return self._parse_window(window, alias=alias) 6432 6433 return window 6434 6435 def _parse_partition_and_order( 6436 self, 6437 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6438 return self._parse_partition_by(), self._parse_order() 6439 6440 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6441 self._match(TokenType.BETWEEN) 6442 6443 return { 6444 "value": ( 6445 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6446 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6447 or self._parse_bitwise() 6448 ), 6449 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6450 } 6451 6452 def _parse_alias( 6453 self, this: t.Optional[exp.Expression], explicit: bool = False 6454 ) -> t.Optional[exp.Expression]: 6455 any_token = self._match(TokenType.ALIAS) 6456 comments = self._prev_comments or [] 6457 6458 if explicit and not any_token: 6459 return this 6460 6461 if self._match(TokenType.L_PAREN): 6462 aliases = self.expression( 6463 exp.Aliases, 6464 comments=comments, 6465 this=this, 6466 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6467 ) 6468 self._match_r_paren(aliases) 6469 return aliases 6470 6471 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6472 self.STRING_ALIASES and self._parse_string_as_identifier() 6473 ) 6474 6475 if alias: 6476 comments.extend(alias.pop_comments()) 6477 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6478 column = this.this 6479 6480 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6481 if not this.comments and column and column.comments: 6482 this.comments = column.pop_comments() 6483 6484 return this 6485 6486 def _parse_id_var( 6487 self, 6488 any_token: bool = True, 6489 tokens: t.Optional[t.Collection[TokenType]] = None, 6490 ) -> t.Optional[exp.Expression]: 6491 expression = self._parse_identifier() 6492 if not expression and ( 6493 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6494 ): 6495 quoted = self._prev.token_type == TokenType.STRING 6496 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6497 6498 return expression 6499 6500 def _parse_string(self) -> t.Optional[exp.Expression]: 6501 if self._match_set(self.STRING_PARSERS): 6502 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6503 return self._parse_placeholder() 6504 6505 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6506 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6507 6508 def _parse_number(self) -> t.Optional[exp.Expression]: 6509 if self._match_set(self.NUMERIC_PARSERS): 6510 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6511 return self._parse_placeholder() 6512 6513 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6514 if self._match(TokenType.IDENTIFIER): 6515 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6516 return self._parse_placeholder() 6517 6518 def _parse_var( 6519 self, 6520 any_token: bool = False, 6521 tokens: t.Optional[t.Collection[TokenType]] = None, 6522 upper: bool = False, 6523 ) -> t.Optional[exp.Expression]: 6524 if ( 6525 (any_token and self._advance_any()) 6526 or self._match(TokenType.VAR) 6527 or (self._match_set(tokens) if tokens else False) 6528 ): 6529 return self.expression( 6530 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6531 ) 6532 return self._parse_placeholder() 6533 6534 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6535 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6536 self._advance() 6537 return self._prev 6538 return None 6539 6540 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6541 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6542 6543 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6544 return self._parse_primary() or self._parse_var(any_token=True) 6545 6546 def _parse_null(self) -> t.Optional[exp.Expression]: 6547 if self._match_set(self.NULL_TOKENS): 6548 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6549 return self._parse_placeholder() 6550 6551 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6552 if self._match(TokenType.TRUE): 6553 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6554 if self._match(TokenType.FALSE): 6555 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6556 return self._parse_placeholder() 6557 6558 def _parse_star(self) -> t.Optional[exp.Expression]: 6559 if self._match(TokenType.STAR): 6560 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6561 return self._parse_placeholder() 6562 6563 def _parse_parameter(self) -> exp.Parameter: 6564 this = self._parse_identifier() or self._parse_primary_or_var() 6565 return self.expression(exp.Parameter, this=this) 6566 6567 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6568 if self._match_set(self.PLACEHOLDER_PARSERS): 6569 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6570 if placeholder: 6571 return placeholder 6572 self._advance(-1) 6573 return None 6574 6575 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6576 if not self._match_texts(keywords): 6577 return None 6578 if self._match(TokenType.L_PAREN, advance=False): 6579 return self._parse_wrapped_csv(self._parse_expression) 6580 6581 expression = self._parse_expression() 6582 return [expression] if expression else None 6583 6584 def _parse_csv( 6585 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6586 ) -> t.List[exp.Expression]: 6587 parse_result = parse_method() 6588 items = [parse_result] if parse_result is not None else [] 6589 6590 while self._match(sep): 6591 self._add_comments(parse_result) 6592 parse_result = parse_method() 6593 if parse_result is not None: 6594 items.append(parse_result) 6595 6596 return items 6597 6598 def _parse_tokens( 6599 self, parse_method: t.Callable, expressions: t.Dict 6600 ) -> t.Optional[exp.Expression]: 6601 this = parse_method() 6602 6603 while self._match_set(expressions): 6604 this = self.expression( 6605 expressions[self._prev.token_type], 6606 this=this, 6607 comments=self._prev_comments, 6608 expression=parse_method(), 6609 ) 6610 6611 return this 6612 6613 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6614 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6615 6616 def _parse_wrapped_csv( 6617 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6618 ) -> t.List[exp.Expression]: 6619 return self._parse_wrapped( 6620 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6621 ) 6622 6623 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6624 wrapped = self._match(TokenType.L_PAREN) 6625 if not wrapped and not optional: 6626 self.raise_error("Expecting (") 6627 parse_result = parse_method() 6628 if wrapped: 6629 self._match_r_paren() 6630 return parse_result 6631 6632 def _parse_expressions(self) -> t.List[exp.Expression]: 6633 return self._parse_csv(self._parse_expression) 6634 6635 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6636 return self._parse_select() or self._parse_set_operations( 6637 self._parse_expression() if alias else self._parse_assignment() 6638 ) 6639 6640 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6641 return self._parse_query_modifiers( 6642 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6643 ) 6644 6645 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6646 this = None 6647 if self._match_texts(self.TRANSACTION_KIND): 6648 this = self._prev.text 6649 6650 self._match_texts(("TRANSACTION", "WORK")) 6651 6652 modes = [] 6653 while True: 6654 mode = [] 6655 while self._match(TokenType.VAR): 6656 mode.append(self._prev.text) 6657 6658 if mode: 6659 modes.append(" ".join(mode)) 6660 if not self._match(TokenType.COMMA): 6661 break 6662 6663 return self.expression(exp.Transaction, this=this, modes=modes) 6664 6665 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6666 chain = None 6667 savepoint = None 6668 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6669 6670 self._match_texts(("TRANSACTION", "WORK")) 6671 6672 if self._match_text_seq("TO"): 6673 self._match_text_seq("SAVEPOINT") 6674 savepoint = self._parse_id_var() 6675 6676 if self._match(TokenType.AND): 6677 chain = not self._match_text_seq("NO") 6678 self._match_text_seq("CHAIN") 6679 6680 if is_rollback: 6681 return self.expression(exp.Rollback, savepoint=savepoint) 6682 6683 return self.expression(exp.Commit, chain=chain) 6684 6685 def _parse_refresh(self) -> exp.Refresh: 6686 self._match(TokenType.TABLE) 6687 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6688 6689 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6690 if not self._match_text_seq("ADD"): 6691 return None 6692 6693 self._match(TokenType.COLUMN) 6694 exists_column = self._parse_exists(not_=True) 6695 expression = self._parse_field_def() 6696 6697 if expression: 6698 expression.set("exists", exists_column) 6699 6700 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6701 if self._match_texts(("FIRST", "AFTER")): 6702 position = self._prev.text 6703 column_position = self.expression( 6704 exp.ColumnPosition, this=self._parse_column(), position=position 6705 ) 6706 expression.set("position", column_position) 6707 6708 return expression 6709 6710 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6711 drop = self._match(TokenType.DROP) and self._parse_drop() 6712 if drop and not isinstance(drop, exp.Command): 6713 drop.set("kind", drop.args.get("kind", "COLUMN")) 6714 return drop 6715 6716 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6717 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6718 return self.expression( 6719 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6720 ) 6721 6722 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6723 index = self._index - 1 6724 6725 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6726 return self._parse_csv( 6727 lambda: self.expression( 6728 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6729 ) 6730 ) 6731 6732 self._retreat(index) 6733 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6734 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6735 6736 if self._match_text_seq("ADD", "COLUMNS"): 6737 schema = self._parse_schema() 6738 if schema: 6739 return [schema] 6740 return [] 6741 6742 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6743 6744 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6745 if self._match_texts(self.ALTER_ALTER_PARSERS): 6746 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6747 6748 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6749 # keyword after ALTER we default to parsing this statement 6750 self._match(TokenType.COLUMN) 6751 column = self._parse_field(any_token=True) 6752 6753 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6754 return self.expression(exp.AlterColumn, this=column, drop=True) 6755 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6756 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6757 if self._match(TokenType.COMMENT): 6758 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6759 if self._match_text_seq("DROP", "NOT", "NULL"): 6760 return self.expression( 6761 exp.AlterColumn, 6762 this=column, 6763 drop=True, 6764 allow_null=True, 6765 ) 6766 if self._match_text_seq("SET", "NOT", "NULL"): 6767 return self.expression( 6768 exp.AlterColumn, 6769 this=column, 6770 allow_null=False, 6771 ) 6772 self._match_text_seq("SET", "DATA") 6773 self._match_text_seq("TYPE") 6774 return self.expression( 6775 exp.AlterColumn, 6776 this=column, 6777 dtype=self._parse_types(), 6778 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6779 using=self._match(TokenType.USING) and self._parse_assignment(), 6780 ) 6781 6782 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6783 if self._match_texts(("ALL", "EVEN", "AUTO")): 6784 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6785 6786 self._match_text_seq("KEY", "DISTKEY") 6787 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6788 6789 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6790 if compound: 6791 self._match_text_seq("SORTKEY") 6792 6793 if self._match(TokenType.L_PAREN, advance=False): 6794 return self.expression( 6795 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6796 ) 6797 6798 self._match_texts(("AUTO", "NONE")) 6799 return self.expression( 6800 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6801 ) 6802 6803 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6804 index = self._index - 1 6805 6806 partition_exists = self._parse_exists() 6807 if self._match(TokenType.PARTITION, advance=False): 6808 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6809 6810 self._retreat(index) 6811 return self._parse_csv(self._parse_drop_column) 6812 6813 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6814 if self._match(TokenType.COLUMN): 6815 exists = self._parse_exists() 6816 old_column = self._parse_column() 6817 to = self._match_text_seq("TO") 6818 new_column = self._parse_column() 6819 6820 if old_column is None or to is None or new_column is None: 6821 return None 6822 6823 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6824 6825 self._match_text_seq("TO") 6826 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6827 6828 def _parse_alter_table_set(self) -> exp.AlterSet: 6829 alter_set = self.expression(exp.AlterSet) 6830 6831 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6832 "TABLE", "PROPERTIES" 6833 ): 6834 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6835 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6836 alter_set.set("expressions", [self._parse_assignment()]) 6837 elif self._match_texts(("LOGGED", "UNLOGGED")): 6838 alter_set.set("option", exp.var(self._prev.text.upper())) 6839 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6840 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6841 elif self._match_text_seq("LOCATION"): 6842 alter_set.set("location", self._parse_field()) 6843 elif self._match_text_seq("ACCESS", "METHOD"): 6844 alter_set.set("access_method", self._parse_field()) 6845 elif self._match_text_seq("TABLESPACE"): 6846 alter_set.set("tablespace", self._parse_field()) 6847 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6848 alter_set.set("file_format", [self._parse_field()]) 6849 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6850 alter_set.set("file_format", self._parse_wrapped_options()) 6851 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6852 alter_set.set("copy_options", self._parse_wrapped_options()) 6853 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6854 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6855 else: 6856 if self._match_text_seq("SERDE"): 6857 alter_set.set("serde", self._parse_field()) 6858 6859 alter_set.set("expressions", [self._parse_properties()]) 6860 6861 return alter_set 6862 6863 def _parse_alter(self) -> exp.Alter | exp.Command: 6864 start = self._prev 6865 6866 alter_token = self._match_set(self.ALTERABLES) and self._prev 6867 if not alter_token: 6868 return self._parse_as_command(start) 6869 6870 exists = self._parse_exists() 6871 only = self._match_text_seq("ONLY") 6872 this = self._parse_table(schema=True) 6873 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6874 6875 if self._next: 6876 self._advance() 6877 6878 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6879 if parser: 6880 actions = ensure_list(parser(self)) 6881 not_valid = self._match_text_seq("NOT", "VALID") 6882 options = self._parse_csv(self._parse_property) 6883 6884 if not self._curr and actions: 6885 return self.expression( 6886 exp.Alter, 6887 this=this, 6888 kind=alter_token.text.upper(), 6889 exists=exists, 6890 actions=actions, 6891 only=only, 6892 options=options, 6893 cluster=cluster, 6894 not_valid=not_valid, 6895 ) 6896 6897 return self._parse_as_command(start) 6898 6899 def _parse_merge(self) -> exp.Merge: 6900 self._match(TokenType.INTO) 6901 target = self._parse_table() 6902 6903 if target and self._match(TokenType.ALIAS, advance=False): 6904 target.set("alias", self._parse_table_alias()) 6905 6906 self._match(TokenType.USING) 6907 using = self._parse_table() 6908 6909 self._match(TokenType.ON) 6910 on = self._parse_assignment() 6911 6912 return self.expression( 6913 exp.Merge, 6914 this=target, 6915 using=using, 6916 on=on, 6917 expressions=self._parse_when_matched(), 6918 returning=self._parse_returning(), 6919 ) 6920 6921 def _parse_when_matched(self) -> t.List[exp.When]: 6922 whens = [] 6923 6924 while self._match(TokenType.WHEN): 6925 matched = not self._match(TokenType.NOT) 6926 self._match_text_seq("MATCHED") 6927 source = ( 6928 False 6929 if self._match_text_seq("BY", "TARGET") 6930 else self._match_text_seq("BY", "SOURCE") 6931 ) 6932 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6933 6934 self._match(TokenType.THEN) 6935 6936 if self._match(TokenType.INSERT): 6937 this = self._parse_star() 6938 if this: 6939 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6940 else: 6941 then = self.expression( 6942 exp.Insert, 6943 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6944 expression=self._match_text_seq("VALUES") and self._parse_value(), 6945 ) 6946 elif self._match(TokenType.UPDATE): 6947 expressions = self._parse_star() 6948 if expressions: 6949 then = self.expression(exp.Update, expressions=expressions) 6950 else: 6951 then = self.expression( 6952 exp.Update, 6953 expressions=self._match(TokenType.SET) 6954 and self._parse_csv(self._parse_equality), 6955 ) 6956 elif self._match(TokenType.DELETE): 6957 then = self.expression(exp.Var, this=self._prev.text) 6958 else: 6959 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6960 6961 whens.append( 6962 self.expression( 6963 exp.When, 6964 matched=matched, 6965 source=source, 6966 condition=condition, 6967 then=then, 6968 ) 6969 ) 6970 return whens 6971 6972 def _parse_show(self) -> t.Optional[exp.Expression]: 6973 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6974 if parser: 6975 return parser(self) 6976 return self._parse_as_command(self._prev) 6977 6978 def _parse_set_item_assignment( 6979 self, kind: t.Optional[str] = None 6980 ) -> t.Optional[exp.Expression]: 6981 index = self._index 6982 6983 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6984 return self._parse_set_transaction(global_=kind == "GLOBAL") 6985 6986 left = self._parse_primary() or self._parse_column() 6987 assignment_delimiter = self._match_texts(("=", "TO")) 6988 6989 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6990 self._retreat(index) 6991 return None 6992 6993 right = self._parse_statement() or self._parse_id_var() 6994 if isinstance(right, (exp.Column, exp.Identifier)): 6995 right = exp.var(right.name) 6996 6997 this = self.expression(exp.EQ, this=left, expression=right) 6998 return self.expression(exp.SetItem, this=this, kind=kind) 6999 7000 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7001 self._match_text_seq("TRANSACTION") 7002 characteristics = self._parse_csv( 7003 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7004 ) 7005 return self.expression( 7006 exp.SetItem, 7007 expressions=characteristics, 7008 kind="TRANSACTION", 7009 **{"global": global_}, # type: ignore 7010 ) 7011 7012 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7013 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7014 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7015 7016 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7017 index = self._index 7018 set_ = self.expression( 7019 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7020 ) 7021 7022 if self._curr: 7023 self._retreat(index) 7024 return self._parse_as_command(self._prev) 7025 7026 return set_ 7027 7028 def _parse_var_from_options( 7029 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7030 ) -> t.Optional[exp.Var]: 7031 start = self._curr 7032 if not start: 7033 return None 7034 7035 option = start.text.upper() 7036 continuations = options.get(option) 7037 7038 index = self._index 7039 self._advance() 7040 for keywords in continuations or []: 7041 if isinstance(keywords, str): 7042 keywords = (keywords,) 7043 7044 if self._match_text_seq(*keywords): 7045 option = f"{option} {' '.join(keywords)}" 7046 break 7047 else: 7048 if continuations or continuations is None: 7049 if raise_unmatched: 7050 self.raise_error(f"Unknown option {option}") 7051 7052 self._retreat(index) 7053 return None 7054 7055 return exp.var(option) 7056 7057 def _parse_as_command(self, start: Token) -> exp.Command: 7058 while self._curr: 7059 self._advance() 7060 text = self._find_sql(start, self._prev) 7061 size = len(start.text) 7062 self._warn_unsupported() 7063 return exp.Command(this=text[:size], expression=text[size:]) 7064 7065 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7066 settings = [] 7067 7068 self._match_l_paren() 7069 kind = self._parse_id_var() 7070 7071 if self._match(TokenType.L_PAREN): 7072 while True: 7073 key = self._parse_id_var() 7074 value = self._parse_primary() 7075 7076 if not key and value is None: 7077 break 7078 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7079 self._match(TokenType.R_PAREN) 7080 7081 self._match_r_paren() 7082 7083 return self.expression( 7084 exp.DictProperty, 7085 this=this, 7086 kind=kind.this if kind else None, 7087 settings=settings, 7088 ) 7089 7090 def _parse_dict_range(self, this: str) -> exp.DictRange: 7091 self._match_l_paren() 7092 has_min = self._match_text_seq("MIN") 7093 if has_min: 7094 min = self._parse_var() or self._parse_primary() 7095 self._match_text_seq("MAX") 7096 max = self._parse_var() or self._parse_primary() 7097 else: 7098 max = self._parse_var() or self._parse_primary() 7099 min = exp.Literal.number(0) 7100 self._match_r_paren() 7101 return self.expression(exp.DictRange, this=this, min=min, max=max) 7102 7103 def _parse_comprehension( 7104 self, this: t.Optional[exp.Expression] 7105 ) -> t.Optional[exp.Comprehension]: 7106 index = self._index 7107 expression = self._parse_column() 7108 if not self._match(TokenType.IN): 7109 self._retreat(index - 1) 7110 return None 7111 iterator = self._parse_column() 7112 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7113 return self.expression( 7114 exp.Comprehension, 7115 this=this, 7116 expression=expression, 7117 iterator=iterator, 7118 condition=condition, 7119 ) 7120 7121 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7122 if self._match(TokenType.HEREDOC_STRING): 7123 return self.expression(exp.Heredoc, this=self._prev.text) 7124 7125 if not self._match_text_seq("$"): 7126 return None 7127 7128 tags = ["$"] 7129 tag_text = None 7130 7131 if self._is_connected(): 7132 self._advance() 7133 tags.append(self._prev.text.upper()) 7134 else: 7135 self.raise_error("No closing $ found") 7136 7137 if tags[-1] != "$": 7138 if self._is_connected() and self._match_text_seq("$"): 7139 tag_text = tags[-1] 7140 tags.append("$") 7141 else: 7142 self.raise_error("No closing $ found") 7143 7144 heredoc_start = self._curr 7145 7146 while self._curr: 7147 if self._match_text_seq(*tags, advance=False): 7148 this = self._find_sql(heredoc_start, self._prev) 7149 self._advance(len(tags)) 7150 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7151 7152 self._advance() 7153 7154 self.raise_error(f"No closing {''.join(tags)} found") 7155 return None 7156 7157 def _find_parser( 7158 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7159 ) -> t.Optional[t.Callable]: 7160 if not self._curr: 7161 return None 7162 7163 index = self._index 7164 this = [] 7165 while True: 7166 # The current token might be multiple words 7167 curr = self._curr.text.upper() 7168 key = curr.split(" ") 7169 this.append(curr) 7170 7171 self._advance() 7172 result, trie = in_trie(trie, key) 7173 if result == TrieResult.FAILED: 7174 break 7175 7176 if result == TrieResult.EXISTS: 7177 subparser = parsers[" ".join(this)] 7178 return subparser 7179 7180 self._retreat(index) 7181 return None 7182 7183 def _match(self, token_type, advance=True, expression=None): 7184 if not self._curr: 7185 return None 7186 7187 if self._curr.token_type == token_type: 7188 if advance: 7189 self._advance() 7190 self._add_comments(expression) 7191 return True 7192 7193 return None 7194 7195 def _match_set(self, types, advance=True): 7196 if not self._curr: 7197 return None 7198 7199 if self._curr.token_type in types: 7200 if advance: 7201 self._advance() 7202 return True 7203 7204 return None 7205 7206 def _match_pair(self, token_type_a, token_type_b, advance=True): 7207 if not self._curr or not self._next: 7208 return None 7209 7210 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7211 if advance: 7212 self._advance(2) 7213 return True 7214 7215 return None 7216 7217 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7218 if not self._match(TokenType.L_PAREN, expression=expression): 7219 self.raise_error("Expecting (") 7220 7221 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7222 if not self._match(TokenType.R_PAREN, expression=expression): 7223 self.raise_error("Expecting )") 7224 7225 def _match_texts(self, texts, advance=True): 7226 if ( 7227 self._curr 7228 and self._curr.token_type != TokenType.STRING 7229 and self._curr.text.upper() in texts 7230 ): 7231 if advance: 7232 self._advance() 7233 return True 7234 return None 7235 7236 def _match_text_seq(self, *texts, advance=True): 7237 index = self._index 7238 for text in texts: 7239 if ( 7240 self._curr 7241 and self._curr.token_type != TokenType.STRING 7242 and self._curr.text.upper() == text 7243 ): 7244 self._advance() 7245 else: 7246 self._retreat(index) 7247 return None 7248 7249 if not advance: 7250 self._retreat(index) 7251 7252 return True 7253 7254 def _replace_lambda( 7255 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7256 ) -> t.Optional[exp.Expression]: 7257 if not node: 7258 return node 7259 7260 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7261 7262 for column in node.find_all(exp.Column): 7263 typ = lambda_types.get(column.parts[0].name) 7264 if typ is not None: 7265 dot_or_id = column.to_dot() if column.table else column.this 7266 7267 if typ: 7268 dot_or_id = self.expression( 7269 exp.Cast, 7270 this=dot_or_id, 7271 to=typ, 7272 ) 7273 7274 parent = column.parent 7275 7276 while isinstance(parent, exp.Dot): 7277 if not isinstance(parent.parent, exp.Dot): 7278 parent.replace(dot_or_id) 7279 break 7280 parent = parent.parent 7281 else: 7282 if column is node: 7283 node = dot_or_id 7284 else: 7285 column.replace(dot_or_id) 7286 return node 7287 7288 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7289 start = self._prev 7290 7291 # Not to be confused with TRUNCATE(number, decimals) function call 7292 if self._match(TokenType.L_PAREN): 7293 self._retreat(self._index - 2) 7294 return self._parse_function() 7295 7296 # Clickhouse supports TRUNCATE DATABASE as well 7297 is_database = self._match(TokenType.DATABASE) 7298 7299 self._match(TokenType.TABLE) 7300 7301 exists = self._parse_exists(not_=False) 7302 7303 expressions = self._parse_csv( 7304 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7305 ) 7306 7307 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7308 7309 if self._match_text_seq("RESTART", "IDENTITY"): 7310 identity = "RESTART" 7311 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7312 identity = "CONTINUE" 7313 else: 7314 identity = None 7315 7316 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7317 option = self._prev.text 7318 else: 7319 option = None 7320 7321 partition = self._parse_partition() 7322 7323 # Fallback case 7324 if self._curr: 7325 return self._parse_as_command(start) 7326 7327 return self.expression( 7328 exp.TruncateTable, 7329 expressions=expressions, 7330 is_database=is_database, 7331 exists=exists, 7332 cluster=cluster, 7333 identity=identity, 7334 option=option, 7335 partition=partition, 7336 ) 7337 7338 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7339 this = self._parse_ordered(self._parse_opclass) 7340 7341 if not self._match(TokenType.WITH): 7342 return this 7343 7344 op = self._parse_var(any_token=True) 7345 7346 return self.expression(exp.WithOperator, this=this, op=op) 7347 7348 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7349 self._match(TokenType.EQ) 7350 self._match(TokenType.L_PAREN) 7351 7352 opts: t.List[t.Optional[exp.Expression]] = [] 7353 while self._curr and not self._match(TokenType.R_PAREN): 7354 if self._match_text_seq("FORMAT_NAME", "="): 7355 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7356 # so we parse it separately to use _parse_field() 7357 prop = self.expression( 7358 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7359 ) 7360 opts.append(prop) 7361 else: 7362 opts.append(self._parse_property()) 7363 7364 self._match(TokenType.COMMA) 7365 7366 return opts 7367 7368 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7369 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7370 7371 options = [] 7372 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7373 option = self._parse_var(any_token=True) 7374 prev = self._prev.text.upper() 7375 7376 # Different dialects might separate options and values by white space, "=" and "AS" 7377 self._match(TokenType.EQ) 7378 self._match(TokenType.ALIAS) 7379 7380 param = self.expression(exp.CopyParameter, this=option) 7381 7382 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7383 TokenType.L_PAREN, advance=False 7384 ): 7385 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7386 param.set("expressions", self._parse_wrapped_options()) 7387 elif prev == "FILE_FORMAT": 7388 # T-SQL's external file format case 7389 param.set("expression", self._parse_field()) 7390 else: 7391 param.set("expression", self._parse_unquoted_field()) 7392 7393 options.append(param) 7394 self._match(sep) 7395 7396 return options 7397 7398 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7399 expr = self.expression(exp.Credentials) 7400 7401 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7402 expr.set("storage", self._parse_field()) 7403 if self._match_text_seq("CREDENTIALS"): 7404 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7405 creds = ( 7406 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7407 ) 7408 expr.set("credentials", creds) 7409 if self._match_text_seq("ENCRYPTION"): 7410 expr.set("encryption", self._parse_wrapped_options()) 7411 if self._match_text_seq("IAM_ROLE"): 7412 expr.set("iam_role", self._parse_field()) 7413 if self._match_text_seq("REGION"): 7414 expr.set("region", self._parse_field()) 7415 7416 return expr 7417 7418 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7419 return self._parse_field() 7420 7421 def _parse_copy(self) -> exp.Copy | exp.Command: 7422 start = self._prev 7423 7424 self._match(TokenType.INTO) 7425 7426 this = ( 7427 self._parse_select(nested=True, parse_subquery_alias=False) 7428 if self._match(TokenType.L_PAREN, advance=False) 7429 else self._parse_table(schema=True) 7430 ) 7431 7432 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7433 7434 files = self._parse_csv(self._parse_file_location) 7435 credentials = self._parse_credentials() 7436 7437 self._match_text_seq("WITH") 7438 7439 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7440 7441 # Fallback case 7442 if self._curr: 7443 return self._parse_as_command(start) 7444 7445 return self.expression( 7446 exp.Copy, 7447 this=this, 7448 kind=kind, 7449 credentials=credentials, 7450 files=files, 7451 params=params, 7452 ) 7453 7454 def _parse_normalize(self) -> exp.Normalize: 7455 return self.expression( 7456 exp.Normalize, 7457 this=self._parse_bitwise(), 7458 form=self._match(TokenType.COMMA) and self._parse_var(), 7459 ) 7460 7461 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7462 if self._match_text_seq("COLUMNS", "(", advance=False): 7463 this = self._parse_function() 7464 if isinstance(this, exp.Columns): 7465 this.set("unpack", True) 7466 return this 7467 7468 return self.expression( 7469 exp.Star, 7470 **{ # type: ignore 7471 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7472 "replace": self._parse_star_op("REPLACE"), 7473 "rename": self._parse_star_op("RENAME"), 7474 }, 7475 ) 7476 7477 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7478 privilege_parts = [] 7479 7480 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7481 # (end of privilege list) or L_PAREN (start of column list) are met 7482 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7483 privilege_parts.append(self._curr.text.upper()) 7484 self._advance() 7485 7486 this = exp.var(" ".join(privilege_parts)) 7487 expressions = ( 7488 self._parse_wrapped_csv(self._parse_column) 7489 if self._match(TokenType.L_PAREN, advance=False) 7490 else None 7491 ) 7492 7493 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7494 7495 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7496 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7497 principal = self._parse_id_var() 7498 7499 if not principal: 7500 return None 7501 7502 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7503 7504 def _parse_grant(self) -> exp.Grant | exp.Command: 7505 start = self._prev 7506 7507 privileges = self._parse_csv(self._parse_grant_privilege) 7508 7509 self._match(TokenType.ON) 7510 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7511 7512 # Attempt to parse the securable e.g. MySQL allows names 7513 # such as "foo.*", "*.*" which are not easily parseable yet 7514 securable = self._try_parse(self._parse_table_parts) 7515 7516 if not securable or not self._match_text_seq("TO"): 7517 return self._parse_as_command(start) 7518 7519 principals = self._parse_csv(self._parse_grant_principal) 7520 7521 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7522 7523 if self._curr: 7524 return self._parse_as_command(start) 7525 7526 return self.expression( 7527 exp.Grant, 7528 privileges=privileges, 7529 kind=kind, 7530 securable=securable, 7531 principals=principals, 7532 grant_option=grant_option, 7533 ) 7534 7535 def _parse_overlay(self) -> exp.Overlay: 7536 return self.expression( 7537 exp.Overlay, 7538 **{ # type: ignore 7539 "this": self._parse_bitwise(), 7540 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7541 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7542 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7543 }, 7544 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1378 def __init__( 1379 self, 1380 error_level: t.Optional[ErrorLevel] = None, 1381 error_message_context: int = 100, 1382 max_errors: int = 3, 1383 dialect: DialectType = None, 1384 ): 1385 from sqlglot.dialects import Dialect 1386 1387 self.error_level = error_level or ErrorLevel.IMMEDIATE 1388 self.error_message_context = error_message_context 1389 self.max_errors = max_errors 1390 self.dialect = Dialect.get_or_raise(dialect) 1391 self.reset()
1403 def parse( 1404 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens and returns a list of syntax trees, one tree 1408 per parsed SQL statement. 1409 1410 Args: 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The list of the produced syntax trees. 1416 """ 1417 return self._parse( 1418 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1419 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1421 def parse_into( 1422 self, 1423 expression_types: exp.IntoType, 1424 raw_tokens: t.List[Token], 1425 sql: t.Optional[str] = None, 1426 ) -> t.List[t.Optional[exp.Expression]]: 1427 """ 1428 Parses a list of tokens into a given Expression type. If a collection of Expression 1429 types is given instead, this method will try to parse the token list into each one 1430 of them, stopping at the first for which the parsing succeeds. 1431 1432 Args: 1433 expression_types: The expression type(s) to try and parse the token list into. 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The target Expression. 1439 """ 1440 errors = [] 1441 for expression_type in ensure_list(expression_types): 1442 parser = self.EXPRESSION_PARSERS.get(expression_type) 1443 if not parser: 1444 raise TypeError(f"No parser registered for {expression_type}") 1445 1446 try: 1447 return self._parse(parser, raw_tokens, sql) 1448 except ParseError as e: 1449 e.errors[0]["into_expression"] = expression_type 1450 errors.append(e) 1451 1452 raise ParseError( 1453 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1454 errors=merge_errors(errors), 1455 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1495 def check_errors(self) -> None: 1496 """Logs or raises any found errors, depending on the chosen error level setting.""" 1497 if self.error_level == ErrorLevel.WARN: 1498 for error in self.errors: 1499 logger.error(str(error)) 1500 elif self.error_level == ErrorLevel.RAISE and self.errors: 1501 raise ParseError( 1502 concat_messages(self.errors, self.max_errors), 1503 errors=merge_errors(self.errors), 1504 )
Logs or raises any found errors, depending on the chosen error level setting.
1506 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1507 """ 1508 Appends an error in the list of recorded errors or raises it, depending on the chosen 1509 error level setting. 1510 """ 1511 token = token or self._curr or self._prev or Token.string("") 1512 start = token.start 1513 end = token.end + 1 1514 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1515 highlight = self.sql[start:end] 1516 end_context = self.sql[end : end + self.error_message_context] 1517 1518 error = ParseError.new( 1519 f"{message}. Line {token.line}, Col: {token.col}.\n" 1520 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1521 description=message, 1522 line=token.line, 1523 col=token.col, 1524 start_context=start_context, 1525 highlight=highlight, 1526 end_context=end_context, 1527 ) 1528 1529 if self.error_level == ErrorLevel.IMMEDIATE: 1530 raise error 1531 1532 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1534 def expression( 1535 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1536 ) -> E: 1537 """ 1538 Creates a new, validated Expression. 1539 1540 Args: 1541 exp_class: The expression class to instantiate. 1542 comments: An optional list of comments to attach to the expression. 1543 kwargs: The arguments to set for the expression along with their respective values. 1544 1545 Returns: 1546 The target expression. 1547 """ 1548 instance = exp_class(**kwargs) 1549 instance.add_comments(comments) if comments else self._add_comments(instance) 1550 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1557 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1558 """ 1559 Validates an Expression, making sure that all its mandatory arguments are set. 1560 1561 Args: 1562 expression: The expression to validate. 1563 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1564 1565 Returns: 1566 The validated expression. 1567 """ 1568 if self.error_level != ErrorLevel.IGNORE: 1569 for error_message in expression.error_messages(args): 1570 self.raise_error(error_message) 1571 1572 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.