sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217class BigQuery(Dialect): 218 WEEK_OFFSET = -1 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 HEX_LOWERCASE = True 224 225 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 226 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 227 228 # bigquery udfs are case sensitive 229 NORMALIZE_FUNCTIONS = False 230 231 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 232 TIME_MAPPING = { 233 "%D": "%m/%d/%y", 234 "%E6S": "%S.%f", 235 } 236 237 FORMAT_MAPPING = { 238 "DD": "%d", 239 "MM": "%m", 240 "MON": "%b", 241 "MONTH": "%B", 242 "YYYY": "%Y", 243 "YY": "%y", 244 "HH": "%I", 245 "HH12": "%I", 246 "HH24": "%H", 247 "MI": "%M", 248 "SS": "%S", 249 "SSSSS": "%f", 250 "TZH": "%z", 251 } 252 253 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 254 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 255 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 256 257 def normalize_identifier(self, expression: E) -> E: 258 if ( 259 isinstance(expression, exp.Identifier) 260 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 261 ): 262 parent = expression.parent 263 while isinstance(parent, exp.Dot): 264 parent = parent.parent 265 266 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 267 # by default. The following check uses a heuristic to detect tables based on whether 268 # they are qualified. This should generally be correct, because tables in BigQuery 269 # must be qualified with at least a dataset, unless @@dataset_id is set. 270 case_sensitive = ( 271 isinstance(parent, exp.UserDefinedFunction) 272 or ( 273 isinstance(parent, exp.Table) 274 and parent.db 275 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 276 ) 277 or expression.meta.get("is_table") 278 ) 279 if not case_sensitive: 280 expression.set("this", expression.this.lower()) 281 282 return expression 283 284 class Tokenizer(tokens.Tokenizer): 285 QUOTES = ["'", '"', '"""', "'''"] 286 COMMENTS = ["--", "#", ("/*", "*/")] 287 IDENTIFIERS = ["`"] 288 STRING_ESCAPES = ["\\"] 289 290 HEX_STRINGS = [("0x", ""), ("0X", "")] 291 292 BYTE_STRINGS = [ 293 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 294 ] 295 296 RAW_STRINGS = [ 297 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 298 ] 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "ANY TYPE": TokenType.VARIANT, 303 "BEGIN": TokenType.COMMAND, 304 "BEGIN TRANSACTION": TokenType.BEGIN, 305 "BYTES": TokenType.BINARY, 306 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 307 "DATETIME": TokenType.TIMESTAMP, 308 "DECLARE": TokenType.COMMAND, 309 "ELSEIF": TokenType.COMMAND, 310 "EXCEPTION": TokenType.COMMAND, 311 "FLOAT64": TokenType.DOUBLE, 312 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 313 "MODEL": TokenType.MODEL, 314 "NOT DETERMINISTIC": TokenType.VOLATILE, 315 "RECORD": TokenType.STRUCT, 316 "TIMESTAMP": TokenType.TIMESTAMPTZ, 317 } 318 KEYWORDS.pop("DIV") 319 KEYWORDS.pop("VALUES") 320 321 class Parser(parser.Parser): 322 PREFIXED_PIVOT_COLUMNS = True 323 LOG_DEFAULTS_TO_LN = True 324 SUPPORTS_IMPLICIT_UNNEST = True 325 326 FUNCTIONS = { 327 **parser.Parser.FUNCTIONS, 328 "DATE": _build_date, 329 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 330 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 331 "DATE_TRUNC": lambda args: exp.DateTrunc( 332 unit=exp.Literal.string(str(seq_get(args, 1))), 333 this=seq_get(args, 0), 334 ), 335 "DATETIME": _build_datetime, 336 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 337 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 338 "DIV": binary_from_function(exp.IntDiv), 339 "FORMAT_DATE": lambda args: exp.TimeToStr( 340 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 341 ), 342 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 343 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 344 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 345 ), 346 "MD5": exp.MD5Digest.from_arg_list, 347 "TO_HEX": _build_to_hex, 348 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 349 [seq_get(args, 1), seq_get(args, 0)] 350 ), 351 "PARSE_TIMESTAMP": _build_parse_timestamp, 352 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), 355 expression=seq_get(args, 1), 356 position=seq_get(args, 2), 357 occurrence=seq_get(args, 3), 358 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 359 ), 360 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 361 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 362 "SPLIT": lambda args: exp.Split( 363 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1) or exp.Literal.string(","), 366 ), 367 "TIME": _build_time, 368 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 369 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 370 "TIMESTAMP": _build_timestamp, 371 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 PROPERTY_PARSERS = { 400 **parser.Parser.PROPERTY_PARSERS, 401 "NOT DETERMINISTIC": lambda self: self.expression( 402 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 403 ), 404 "OPTIONS": lambda self: self._parse_with_property(), 405 } 406 407 CONSTRAINT_PARSERS = { 408 **parser.Parser.CONSTRAINT_PARSERS, 409 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 410 } 411 412 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 413 RANGE_PARSERS.pop(TokenType.OVERLAPS) 414 415 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 416 417 STATEMENT_PARSERS = { 418 **parser.Parser.STATEMENT_PARSERS, 419 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 420 TokenType.END: lambda self: self._parse_as_command(self._prev), 421 TokenType.FOR: lambda self: self._parse_for_in(), 422 } 423 424 BRACKET_OFFSETS = { 425 "OFFSET": (0, False), 426 "ORDINAL": (1, False), 427 "SAFE_OFFSET": (0, True), 428 "SAFE_ORDINAL": (1, True), 429 } 430 431 def _parse_for_in(self) -> exp.ForIn: 432 this = self._parse_range() 433 self._match_text_seq("DO") 434 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 435 436 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 437 this = super()._parse_table_part(schema=schema) or self._parse_number() 438 439 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 440 if isinstance(this, exp.Identifier): 441 table_name = this.name 442 while self._match(TokenType.DASH, advance=False) and self._next: 443 text = "" 444 while self._curr and self._curr.token_type != TokenType.DOT: 445 self._advance() 446 text += self._prev.text 447 table_name += text 448 449 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 450 elif isinstance(this, exp.Literal): 451 table_name = this.name 452 453 if self._is_connected() and self._parse_var(any_token=True): 454 table_name += self._prev.text 455 456 this = exp.Identifier(this=table_name, quoted=True) 457 458 return this 459 460 def _parse_table_parts( 461 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 462 ) -> exp.Table: 463 table = super()._parse_table_parts( 464 schema=schema, is_db_reference=is_db_reference, wildcard=True 465 ) 466 467 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 468 if not table.catalog: 469 if table.db: 470 parts = table.db.split(".") 471 if len(parts) == 2 and not table.args["db"].quoted: 472 table.set("catalog", exp.Identifier(this=parts[0])) 473 table.set("db", exp.Identifier(this=parts[1])) 474 else: 475 parts = table.name.split(".") 476 if len(parts) == 2 and not table.this.quoted: 477 table.set("db", exp.Identifier(this=parts[0])) 478 table.set("this", exp.Identifier(this=parts[1])) 479 480 if any("." in p.name for p in table.parts): 481 catalog, db, this, *rest = ( 482 exp.to_identifier(p, quoted=True) 483 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 484 ) 485 486 if rest and this: 487 this = exp.Dot.build([this, *rest]) # type: ignore 488 489 table = exp.Table( 490 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 491 ) 492 table.meta["quoted_table"] = True 493 494 return table 495 496 def _parse_column(self) -> t.Optional[exp.Expression]: 497 column = super()._parse_column() 498 if isinstance(column, exp.Column): 499 parts = column.parts 500 if any("." in p.name for p in parts): 501 catalog, db, table, this, *rest = ( 502 exp.to_identifier(p, quoted=True) 503 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 504 ) 505 506 if rest and this: 507 this = exp.Dot.build([this, *rest]) # type: ignore 508 509 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 510 column.meta["quoted_column"] = True 511 512 return column 513 514 @t.overload 515 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 516 517 @t.overload 518 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 519 520 def _parse_json_object(self, agg=False): 521 json_object = super()._parse_json_object() 522 array_kv_pair = seq_get(json_object.expressions, 0) 523 524 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 525 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 526 if ( 527 array_kv_pair 528 and isinstance(array_kv_pair.this, exp.Array) 529 and isinstance(array_kv_pair.expression, exp.Array) 530 ): 531 keys = array_kv_pair.this.expressions 532 values = array_kv_pair.expression.expressions 533 534 json_object.set( 535 "expressions", 536 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 537 ) 538 539 return json_object 540 541 def _parse_bracket( 542 self, this: t.Optional[exp.Expression] = None 543 ) -> t.Optional[exp.Expression]: 544 bracket = super()._parse_bracket(this) 545 546 if this is bracket: 547 return bracket 548 549 if isinstance(bracket, exp.Bracket): 550 for expression in bracket.expressions: 551 name = expression.name.upper() 552 553 if name not in self.BRACKET_OFFSETS: 554 break 555 556 offset, safe = self.BRACKET_OFFSETS[name] 557 bracket.set("offset", offset) 558 bracket.set("safe", safe) 559 expression.replace(expression.expressions[0]) 560 561 return bracket 562 563 class Generator(generator.Generator): 564 EXPLICIT_SET_OP = True 565 INTERVAL_ALLOWS_PLURAL_FORM = False 566 JOIN_HINTS = False 567 QUERY_HINTS = False 568 TABLE_HINTS = False 569 LIMIT_FETCH = "LIMIT" 570 RENAME_TABLE_WITH_DB = False 571 NVL2_SUPPORTED = False 572 UNNEST_WITH_ORDINALITY = False 573 COLLATE_IS_FUNC = True 574 LIMIT_ONLY_LITERALS = True 575 SUPPORTS_TABLE_ALIAS_COLUMNS = False 576 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 577 JSON_KEY_VALUE_PAIR_SEP = "," 578 NULL_ORDERING_SUPPORTED = False 579 IGNORE_NULLS_IN_FUNC = True 580 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 581 CAN_IMPLEMENT_ARRAY_ANY = True 582 SUPPORTS_TO_NUMBER = False 583 NAMED_PLACEHOLDER_TOKEN = "@" 584 HEX_FUNC = "TO_HEX" 585 WITH_PROPERTIES_PREFIX = "OPTIONS" 586 587 TRANSFORMS = { 588 **generator.Generator.TRANSFORMS, 589 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 590 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 591 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 592 exp.Array: inline_array_unless_query, 593 exp.ArrayContains: _array_contains_sql, 594 exp.ArrayFilter: filter_array_using_unnest, 595 exp.ArraySize: rename_func("ARRAY_LENGTH"), 596 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 597 exp.CollateProperty: lambda self, e: ( 598 f"DEFAULT COLLATE {self.sql(e, 'this')}" 599 if e.args.get("default") 600 else f"COLLATE {self.sql(e, 'this')}" 601 ), 602 exp.Commit: lambda *_: "COMMIT TRANSACTION", 603 exp.CountIf: rename_func("COUNTIF"), 604 exp.Create: _create_sql, 605 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 606 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 607 exp.DateDiff: lambda self, e: self.func( 608 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 609 ), 610 exp.DateFromParts: rename_func("DATE"), 611 exp.DateStrToDate: datestrtodate_sql, 612 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 613 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 614 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 615 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 616 exp.FromTimeZone: lambda self, e: self.func( 617 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 618 ), 619 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 620 exp.GroupConcat: rename_func("STRING_AGG"), 621 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 622 exp.If: if_sql(false_value="NULL"), 623 exp.ILike: no_ilike_sql, 624 exp.IntDiv: rename_func("DIV"), 625 exp.JSONFormat: rename_func("TO_JSON_STRING"), 626 exp.Max: max_or_greatest, 627 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 628 exp.MD5Digest: rename_func("MD5"), 629 exp.Min: min_or_least, 630 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 631 exp.RegexpExtract: lambda self, e: self.func( 632 "REGEXP_EXTRACT", 633 e.this, 634 e.expression, 635 e.args.get("position"), 636 e.args.get("occurrence"), 637 ), 638 exp.RegexpReplace: regexp_replace_sql, 639 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 640 exp.ReturnsProperty: _returnsproperty_sql, 641 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 642 exp.Select: transforms.preprocess( 643 [ 644 transforms.explode_to_unnest(), 645 transforms.unqualify_unnest, 646 transforms.eliminate_distinct_on, 647 _alias_ordered_group, 648 transforms.eliminate_semi_and_anti_joins, 649 ] 650 ), 651 exp.SHA: rename_func("SHA1"), 652 exp.SHA2: sha256_sql, 653 exp.StabilityProperty: lambda self, e: ( 654 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 655 ), 656 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 657 exp.StrToTime: lambda self, e: self.func( 658 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 659 ), 660 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 661 exp.TimeFromParts: rename_func("TIME"), 662 exp.TimestampFromParts: rename_func("DATETIME"), 663 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 664 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 665 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 666 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 667 exp.TimeStrToTime: timestrtotime_sql, 668 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 669 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 670 exp.TsOrDsAdd: _ts_or_ds_add_sql, 671 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 672 exp.TsOrDsToTime: rename_func("TIME"), 673 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 674 exp.Unhex: rename_func("FROM_HEX"), 675 exp.UnixDate: rename_func("UNIX_DATE"), 676 exp.UnixToTime: _unix_to_time_sql, 677 exp.Values: _derived_table_values_to_unnest, 678 exp.VariancePop: rename_func("VAR_POP"), 679 } 680 681 SUPPORTED_JSON_PATH_PARTS = { 682 exp.JSONPathKey, 683 exp.JSONPathRoot, 684 exp.JSONPathSubscript, 685 } 686 687 TYPE_MAPPING = { 688 **generator.Generator.TYPE_MAPPING, 689 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 690 exp.DataType.Type.BIGINT: "INT64", 691 exp.DataType.Type.BINARY: "BYTES", 692 exp.DataType.Type.BOOLEAN: "BOOL", 693 exp.DataType.Type.CHAR: "STRING", 694 exp.DataType.Type.DECIMAL: "NUMERIC", 695 exp.DataType.Type.DOUBLE: "FLOAT64", 696 exp.DataType.Type.FLOAT: "FLOAT64", 697 exp.DataType.Type.INT: "INT64", 698 exp.DataType.Type.NCHAR: "STRING", 699 exp.DataType.Type.NVARCHAR: "STRING", 700 exp.DataType.Type.SMALLINT: "INT64", 701 exp.DataType.Type.TEXT: "STRING", 702 exp.DataType.Type.TIMESTAMP: "DATETIME", 703 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 704 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 705 exp.DataType.Type.TINYINT: "INT64", 706 exp.DataType.Type.VARBINARY: "BYTES", 707 exp.DataType.Type.ROWVERSION: "BYTES", 708 exp.DataType.Type.VARCHAR: "STRING", 709 exp.DataType.Type.VARIANT: "ANY TYPE", 710 } 711 712 PROPERTIES_LOCATION = { 713 **generator.Generator.PROPERTIES_LOCATION, 714 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 715 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 716 } 717 718 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 719 RESERVED_KEYWORDS = { 720 "all", 721 "and", 722 "any", 723 "array", 724 "as", 725 "asc", 726 "assert_rows_modified", 727 "at", 728 "between", 729 "by", 730 "case", 731 "cast", 732 "collate", 733 "contains", 734 "create", 735 "cross", 736 "cube", 737 "current", 738 "default", 739 "define", 740 "desc", 741 "distinct", 742 "else", 743 "end", 744 "enum", 745 "escape", 746 "except", 747 "exclude", 748 "exists", 749 "extract", 750 "false", 751 "fetch", 752 "following", 753 "for", 754 "from", 755 "full", 756 "group", 757 "grouping", 758 "groups", 759 "hash", 760 "having", 761 "if", 762 "ignore", 763 "in", 764 "inner", 765 "intersect", 766 "interval", 767 "into", 768 "is", 769 "join", 770 "lateral", 771 "left", 772 "like", 773 "limit", 774 "lookup", 775 "merge", 776 "natural", 777 "new", 778 "no", 779 "not", 780 "null", 781 "nulls", 782 "of", 783 "on", 784 "or", 785 "order", 786 "outer", 787 "over", 788 "partition", 789 "preceding", 790 "proto", 791 "qualify", 792 "range", 793 "recursive", 794 "respect", 795 "right", 796 "rollup", 797 "rows", 798 "select", 799 "set", 800 "some", 801 "struct", 802 "tablesample", 803 "then", 804 "to", 805 "treat", 806 "true", 807 "unbounded", 808 "union", 809 "unnest", 810 "using", 811 "when", 812 "where", 813 "window", 814 "with", 815 "within", 816 } 817 818 def mod_sql(self, expression: exp.Mod) -> str: 819 this = expression.this 820 expr = expression.expression 821 return self.func( 822 "MOD", 823 this.unnest() if isinstance(this, exp.Paren) else this, 824 expr.unnest() if isinstance(expr, exp.Paren) else expr, 825 ) 826 827 def column_parts(self, expression: exp.Column) -> str: 828 if expression.meta.get("quoted_column"): 829 # If a column reference is of the form `dataset.table`.name, we need 830 # to preserve the quoted table path, otherwise the reference breaks 831 table_parts = ".".join(p.name for p in expression.parts[:-1]) 832 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 833 return f"{table_path}.{self.sql(expression, 'this')}" 834 835 return super().column_parts(expression) 836 837 def table_parts(self, expression: exp.Table) -> str: 838 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 839 # we need to make sure the correct quoting is used in each case. 840 # 841 # For example, if there is a CTE x that clashes with a schema name, then the former will 842 # return the table y in that schema, whereas the latter will return the CTE's y column: 843 # 844 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 845 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 846 if expression.meta.get("quoted_table"): 847 table_parts = ".".join(p.name for p in expression.parts) 848 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 849 850 return super().table_parts(expression) 851 852 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 853 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 854 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 855 856 def eq_sql(self, expression: exp.EQ) -> str: 857 # Operands of = cannot be NULL in BigQuery 858 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 859 if not isinstance(expression.parent, exp.Update): 860 return "NULL" 861 862 return self.binary(expression, "=") 863 864 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 865 parent = expression.parent 866 867 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 868 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 869 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 870 return self.func( 871 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 872 ) 873 874 return super().attimezone_sql(expression) 875 876 def trycast_sql(self, expression: exp.TryCast) -> str: 877 return self.cast_sql(expression, safe_prefix="SAFE_") 878 879 def bracket_sql(self, expression: exp.Bracket) -> str: 880 this = expression.this 881 expressions = expression.expressions 882 883 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 884 arg = expressions[0] 885 if arg.type is None: 886 from sqlglot.optimizer.annotate_types import annotate_types 887 888 arg = annotate_types(arg) 889 890 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 891 # BQ doesn't support bracket syntax with string values for structs 892 return f"{self.sql(this)}.{arg.name}" 893 894 expressions_sql = self.expressions(expression, flat=True) 895 offset = expression.args.get("offset") 896 897 if offset == 0: 898 expressions_sql = f"OFFSET({expressions_sql})" 899 elif offset == 1: 900 expressions_sql = f"ORDINAL({expressions_sql})" 901 elif offset is not None: 902 self.unsupported(f"Unsupported array offset: {offset}") 903 904 if expression.args.get("safe"): 905 expressions_sql = f"SAFE_{expressions_sql}" 906 907 return f"{self.sql(this)}[{expressions_sql}]" 908 909 def in_unnest_op(self, expression: exp.Unnest) -> str: 910 return self.sql(expression) 911 912 def except_op(self, expression: exp.Except) -> str: 913 if not expression.args.get("distinct"): 914 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 915 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 916 917 def intersect_op(self, expression: exp.Intersect) -> str: 918 if not expression.args.get("distinct"): 919 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 920 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 921 922 def version_sql(self, expression: exp.Version) -> str: 923 if expression.name == "TIMESTAMP": 924 expression.set("this", "SYSTEM_TIME") 925 return super().version_sql(expression)
218class BigQuery(Dialect): 219 WEEK_OFFSET = -1 220 UNNEST_COLUMN_ONLY = True 221 SUPPORTS_USER_DEFINED_TYPES = False 222 SUPPORTS_SEMI_ANTI_JOIN = False 223 LOG_BASE_FIRST = False 224 HEX_LOWERCASE = True 225 226 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 227 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 228 229 # bigquery udfs are case sensitive 230 NORMALIZE_FUNCTIONS = False 231 232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 233 TIME_MAPPING = { 234 "%D": "%m/%d/%y", 235 "%E6S": "%S.%f", 236 } 237 238 FORMAT_MAPPING = { 239 "DD": "%d", 240 "MM": "%m", 241 "MON": "%b", 242 "MONTH": "%B", 243 "YYYY": "%Y", 244 "YY": "%y", 245 "HH": "%I", 246 "HH12": "%I", 247 "HH24": "%H", 248 "MI": "%M", 249 "SS": "%S", 250 "SSSSS": "%f", 251 "TZH": "%z", 252 } 253 254 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 255 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 256 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 257 258 def normalize_identifier(self, expression: E) -> E: 259 if ( 260 isinstance(expression, exp.Identifier) 261 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 262 ): 263 parent = expression.parent 264 while isinstance(parent, exp.Dot): 265 parent = parent.parent 266 267 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 268 # by default. The following check uses a heuristic to detect tables based on whether 269 # they are qualified. This should generally be correct, because tables in BigQuery 270 # must be qualified with at least a dataset, unless @@dataset_id is set. 271 case_sensitive = ( 272 isinstance(parent, exp.UserDefinedFunction) 273 or ( 274 isinstance(parent, exp.Table) 275 and parent.db 276 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 277 ) 278 or expression.meta.get("is_table") 279 ) 280 if not case_sensitive: 281 expression.set("this", expression.this.lower()) 282 283 return expression 284 285 class Tokenizer(tokens.Tokenizer): 286 QUOTES = ["'", '"', '"""', "'''"] 287 COMMENTS = ["--", "#", ("/*", "*/")] 288 IDENTIFIERS = ["`"] 289 STRING_ESCAPES = ["\\"] 290 291 HEX_STRINGS = [("0x", ""), ("0X", "")] 292 293 BYTE_STRINGS = [ 294 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 295 ] 296 297 RAW_STRINGS = [ 298 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 299 ] 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "ANY TYPE": TokenType.VARIANT, 304 "BEGIN": TokenType.COMMAND, 305 "BEGIN TRANSACTION": TokenType.BEGIN, 306 "BYTES": TokenType.BINARY, 307 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 308 "DATETIME": TokenType.TIMESTAMP, 309 "DECLARE": TokenType.COMMAND, 310 "ELSEIF": TokenType.COMMAND, 311 "EXCEPTION": TokenType.COMMAND, 312 "FLOAT64": TokenType.DOUBLE, 313 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 314 "MODEL": TokenType.MODEL, 315 "NOT DETERMINISTIC": TokenType.VOLATILE, 316 "RECORD": TokenType.STRUCT, 317 "TIMESTAMP": TokenType.TIMESTAMPTZ, 318 } 319 KEYWORDS.pop("DIV") 320 KEYWORDS.pop("VALUES") 321 322 class Parser(parser.Parser): 323 PREFIXED_PIVOT_COLUMNS = True 324 LOG_DEFAULTS_TO_LN = True 325 SUPPORTS_IMPLICIT_UNNEST = True 326 327 FUNCTIONS = { 328 **parser.Parser.FUNCTIONS, 329 "DATE": _build_date, 330 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 331 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 332 "DATE_TRUNC": lambda args: exp.DateTrunc( 333 unit=exp.Literal.string(str(seq_get(args, 1))), 334 this=seq_get(args, 0), 335 ), 336 "DATETIME": _build_datetime, 337 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _build_to_hex, 349 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _build_parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _build_time, 369 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP": _build_timestamp, 372 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 381 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 382 } 383 384 FUNCTION_PARSERS = { 385 **parser.Parser.FUNCTION_PARSERS, 386 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 387 } 388 FUNCTION_PARSERS.pop("TRIM") 389 390 NO_PAREN_FUNCTIONS = { 391 **parser.Parser.NO_PAREN_FUNCTIONS, 392 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 393 } 394 395 NESTED_TYPE_TOKENS = { 396 *parser.Parser.NESTED_TYPE_TOKENS, 397 TokenType.TABLE, 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "NOT DETERMINISTIC": lambda self: self.expression( 403 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 404 ), 405 "OPTIONS": lambda self: self._parse_with_property(), 406 } 407 408 CONSTRAINT_PARSERS = { 409 **parser.Parser.CONSTRAINT_PARSERS, 410 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 411 } 412 413 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 414 RANGE_PARSERS.pop(TokenType.OVERLAPS) 415 416 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 417 418 STATEMENT_PARSERS = { 419 **parser.Parser.STATEMENT_PARSERS, 420 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 421 TokenType.END: lambda self: self._parse_as_command(self._prev), 422 TokenType.FOR: lambda self: self._parse_for_in(), 423 } 424 425 BRACKET_OFFSETS = { 426 "OFFSET": (0, False), 427 "ORDINAL": (1, False), 428 "SAFE_OFFSET": (0, True), 429 "SAFE_ORDINAL": (1, True), 430 } 431 432 def _parse_for_in(self) -> exp.ForIn: 433 this = self._parse_range() 434 self._match_text_seq("DO") 435 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 436 437 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 438 this = super()._parse_table_part(schema=schema) or self._parse_number() 439 440 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 441 if isinstance(this, exp.Identifier): 442 table_name = this.name 443 while self._match(TokenType.DASH, advance=False) and self._next: 444 text = "" 445 while self._curr and self._curr.token_type != TokenType.DOT: 446 self._advance() 447 text += self._prev.text 448 table_name += text 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts( 462 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 463 ) -> exp.Table: 464 table = super()._parse_table_parts( 465 schema=schema, is_db_reference=is_db_reference, wildcard=True 466 ) 467 468 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 469 if not table.catalog: 470 if table.db: 471 parts = table.db.split(".") 472 if len(parts) == 2 and not table.args["db"].quoted: 473 table.set("catalog", exp.Identifier(this=parts[0])) 474 table.set("db", exp.Identifier(this=parts[1])) 475 else: 476 parts = table.name.split(".") 477 if len(parts) == 2 and not table.this.quoted: 478 table.set("db", exp.Identifier(this=parts[0])) 479 table.set("this", exp.Identifier(this=parts[1])) 480 481 if any("." in p.name for p in table.parts): 482 catalog, db, this, *rest = ( 483 exp.to_identifier(p, quoted=True) 484 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 485 ) 486 487 if rest and this: 488 this = exp.Dot.build([this, *rest]) # type: ignore 489 490 table = exp.Table( 491 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 492 ) 493 table.meta["quoted_table"] = True 494 495 return table 496 497 def _parse_column(self) -> t.Optional[exp.Expression]: 498 column = super()._parse_column() 499 if isinstance(column, exp.Column): 500 parts = column.parts 501 if any("." in p.name for p in parts): 502 catalog, db, table, this, *rest = ( 503 exp.to_identifier(p, quoted=True) 504 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 505 ) 506 507 if rest and this: 508 this = exp.Dot.build([this, *rest]) # type: ignore 509 510 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 511 column.meta["quoted_column"] = True 512 513 return column 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 517 518 @t.overload 519 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 520 521 def _parse_json_object(self, agg=False): 522 json_object = super()._parse_json_object() 523 array_kv_pair = seq_get(json_object.expressions, 0) 524 525 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 526 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 527 if ( 528 array_kv_pair 529 and isinstance(array_kv_pair.this, exp.Array) 530 and isinstance(array_kv_pair.expression, exp.Array) 531 ): 532 keys = array_kv_pair.this.expressions 533 values = array_kv_pair.expression.expressions 534 535 json_object.set( 536 "expressions", 537 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 538 ) 539 540 return json_object 541 542 def _parse_bracket( 543 self, this: t.Optional[exp.Expression] = None 544 ) -> t.Optional[exp.Expression]: 545 bracket = super()._parse_bracket(this) 546 547 if this is bracket: 548 return bracket 549 550 if isinstance(bracket, exp.Bracket): 551 for expression in bracket.expressions: 552 name = expression.name.upper() 553 554 if name not in self.BRACKET_OFFSETS: 555 break 556 557 offset, safe = self.BRACKET_OFFSETS[name] 558 bracket.set("offset", offset) 559 bracket.set("safe", safe) 560 expression.replace(expression.expressions[0]) 561 562 return bracket 563 564 class Generator(generator.Generator): 565 EXPLICIT_SET_OP = True 566 INTERVAL_ALLOWS_PLURAL_FORM = False 567 JOIN_HINTS = False 568 QUERY_HINTS = False 569 TABLE_HINTS = False 570 LIMIT_FETCH = "LIMIT" 571 RENAME_TABLE_WITH_DB = False 572 NVL2_SUPPORTED = False 573 UNNEST_WITH_ORDINALITY = False 574 COLLATE_IS_FUNC = True 575 LIMIT_ONLY_LITERALS = True 576 SUPPORTS_TABLE_ALIAS_COLUMNS = False 577 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 578 JSON_KEY_VALUE_PAIR_SEP = "," 579 NULL_ORDERING_SUPPORTED = False 580 IGNORE_NULLS_IN_FUNC = True 581 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 582 CAN_IMPLEMENT_ARRAY_ANY = True 583 SUPPORTS_TO_NUMBER = False 584 NAMED_PLACEHOLDER_TOKEN = "@" 585 HEX_FUNC = "TO_HEX" 586 WITH_PROPERTIES_PREFIX = "OPTIONS" 587 588 TRANSFORMS = { 589 **generator.Generator.TRANSFORMS, 590 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 591 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 592 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 593 exp.Array: inline_array_unless_query, 594 exp.ArrayContains: _array_contains_sql, 595 exp.ArrayFilter: filter_array_using_unnest, 596 exp.ArraySize: rename_func("ARRAY_LENGTH"), 597 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 598 exp.CollateProperty: lambda self, e: ( 599 f"DEFAULT COLLATE {self.sql(e, 'this')}" 600 if e.args.get("default") 601 else f"COLLATE {self.sql(e, 'this')}" 602 ), 603 exp.Commit: lambda *_: "COMMIT TRANSACTION", 604 exp.CountIf: rename_func("COUNTIF"), 605 exp.Create: _create_sql, 606 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 607 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 608 exp.DateDiff: lambda self, e: self.func( 609 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 610 ), 611 exp.DateFromParts: rename_func("DATE"), 612 exp.DateStrToDate: datestrtodate_sql, 613 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 614 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 615 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 616 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 617 exp.FromTimeZone: lambda self, e: self.func( 618 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 619 ), 620 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 621 exp.GroupConcat: rename_func("STRING_AGG"), 622 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 623 exp.If: if_sql(false_value="NULL"), 624 exp.ILike: no_ilike_sql, 625 exp.IntDiv: rename_func("DIV"), 626 exp.JSONFormat: rename_func("TO_JSON_STRING"), 627 exp.Max: max_or_greatest, 628 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 629 exp.MD5Digest: rename_func("MD5"), 630 exp.Min: min_or_least, 631 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 632 exp.RegexpExtract: lambda self, e: self.func( 633 "REGEXP_EXTRACT", 634 e.this, 635 e.expression, 636 e.args.get("position"), 637 e.args.get("occurrence"), 638 ), 639 exp.RegexpReplace: regexp_replace_sql, 640 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 641 exp.ReturnsProperty: _returnsproperty_sql, 642 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 643 exp.Select: transforms.preprocess( 644 [ 645 transforms.explode_to_unnest(), 646 transforms.unqualify_unnest, 647 transforms.eliminate_distinct_on, 648 _alias_ordered_group, 649 transforms.eliminate_semi_and_anti_joins, 650 ] 651 ), 652 exp.SHA: rename_func("SHA1"), 653 exp.SHA2: sha256_sql, 654 exp.StabilityProperty: lambda self, e: ( 655 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 656 ), 657 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 658 exp.StrToTime: lambda self, e: self.func( 659 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 660 ), 661 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 662 exp.TimeFromParts: rename_func("TIME"), 663 exp.TimestampFromParts: rename_func("DATETIME"), 664 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 665 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 666 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 667 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 668 exp.TimeStrToTime: timestrtotime_sql, 669 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 670 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 671 exp.TsOrDsAdd: _ts_or_ds_add_sql, 672 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 673 exp.TsOrDsToTime: rename_func("TIME"), 674 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 675 exp.Unhex: rename_func("FROM_HEX"), 676 exp.UnixDate: rename_func("UNIX_DATE"), 677 exp.UnixToTime: _unix_to_time_sql, 678 exp.Values: _derived_table_values_to_unnest, 679 exp.VariancePop: rename_func("VAR_POP"), 680 } 681 682 SUPPORTED_JSON_PATH_PARTS = { 683 exp.JSONPathKey, 684 exp.JSONPathRoot, 685 exp.JSONPathSubscript, 686 } 687 688 TYPE_MAPPING = { 689 **generator.Generator.TYPE_MAPPING, 690 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 691 exp.DataType.Type.BIGINT: "INT64", 692 exp.DataType.Type.BINARY: "BYTES", 693 exp.DataType.Type.BOOLEAN: "BOOL", 694 exp.DataType.Type.CHAR: "STRING", 695 exp.DataType.Type.DECIMAL: "NUMERIC", 696 exp.DataType.Type.DOUBLE: "FLOAT64", 697 exp.DataType.Type.FLOAT: "FLOAT64", 698 exp.DataType.Type.INT: "INT64", 699 exp.DataType.Type.NCHAR: "STRING", 700 exp.DataType.Type.NVARCHAR: "STRING", 701 exp.DataType.Type.SMALLINT: "INT64", 702 exp.DataType.Type.TEXT: "STRING", 703 exp.DataType.Type.TIMESTAMP: "DATETIME", 704 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 705 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 706 exp.DataType.Type.TINYINT: "INT64", 707 exp.DataType.Type.VARBINARY: "BYTES", 708 exp.DataType.Type.ROWVERSION: "BYTES", 709 exp.DataType.Type.VARCHAR: "STRING", 710 exp.DataType.Type.VARIANT: "ANY TYPE", 711 } 712 713 PROPERTIES_LOCATION = { 714 **generator.Generator.PROPERTIES_LOCATION, 715 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 716 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 717 } 718 719 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 720 RESERVED_KEYWORDS = { 721 "all", 722 "and", 723 "any", 724 "array", 725 "as", 726 "asc", 727 "assert_rows_modified", 728 "at", 729 "between", 730 "by", 731 "case", 732 "cast", 733 "collate", 734 "contains", 735 "create", 736 "cross", 737 "cube", 738 "current", 739 "default", 740 "define", 741 "desc", 742 "distinct", 743 "else", 744 "end", 745 "enum", 746 "escape", 747 "except", 748 "exclude", 749 "exists", 750 "extract", 751 "false", 752 "fetch", 753 "following", 754 "for", 755 "from", 756 "full", 757 "group", 758 "grouping", 759 "groups", 760 "hash", 761 "having", 762 "if", 763 "ignore", 764 "in", 765 "inner", 766 "intersect", 767 "interval", 768 "into", 769 "is", 770 "join", 771 "lateral", 772 "left", 773 "like", 774 "limit", 775 "lookup", 776 "merge", 777 "natural", 778 "new", 779 "no", 780 "not", 781 "null", 782 "nulls", 783 "of", 784 "on", 785 "or", 786 "order", 787 "outer", 788 "over", 789 "partition", 790 "preceding", 791 "proto", 792 "qualify", 793 "range", 794 "recursive", 795 "respect", 796 "right", 797 "rollup", 798 "rows", 799 "select", 800 "set", 801 "some", 802 "struct", 803 "tablesample", 804 "then", 805 "to", 806 "treat", 807 "true", 808 "unbounded", 809 "union", 810 "unnest", 811 "using", 812 "when", 813 "where", 814 "window", 815 "with", 816 "within", 817 } 818 819 def mod_sql(self, expression: exp.Mod) -> str: 820 this = expression.this 821 expr = expression.expression 822 return self.func( 823 "MOD", 824 this.unnest() if isinstance(this, exp.Paren) else this, 825 expr.unnest() if isinstance(expr, exp.Paren) else expr, 826 ) 827 828 def column_parts(self, expression: exp.Column) -> str: 829 if expression.meta.get("quoted_column"): 830 # If a column reference is of the form `dataset.table`.name, we need 831 # to preserve the quoted table path, otherwise the reference breaks 832 table_parts = ".".join(p.name for p in expression.parts[:-1]) 833 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 834 return f"{table_path}.{self.sql(expression, 'this')}" 835 836 return super().column_parts(expression) 837 838 def table_parts(self, expression: exp.Table) -> str: 839 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 840 # we need to make sure the correct quoting is used in each case. 841 # 842 # For example, if there is a CTE x that clashes with a schema name, then the former will 843 # return the table y in that schema, whereas the latter will return the CTE's y column: 844 # 845 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 846 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 847 if expression.meta.get("quoted_table"): 848 table_parts = ".".join(p.name for p in expression.parts) 849 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 850 851 return super().table_parts(expression) 852 853 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 854 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 855 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 856 857 def eq_sql(self, expression: exp.EQ) -> str: 858 # Operands of = cannot be NULL in BigQuery 859 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 860 if not isinstance(expression.parent, exp.Update): 861 return "NULL" 862 863 return self.binary(expression, "=") 864 865 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 866 parent = expression.parent 867 868 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 869 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 870 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 871 return self.func( 872 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 873 ) 874 875 return super().attimezone_sql(expression) 876 877 def trycast_sql(self, expression: exp.TryCast) -> str: 878 return self.cast_sql(expression, safe_prefix="SAFE_") 879 880 def bracket_sql(self, expression: exp.Bracket) -> str: 881 this = expression.this 882 expressions = expression.expressions 883 884 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 885 arg = expressions[0] 886 if arg.type is None: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 892 # BQ doesn't support bracket syntax with string values for structs 893 return f"{self.sql(this)}.{arg.name}" 894 895 expressions_sql = self.expressions(expression, flat=True) 896 offset = expression.args.get("offset") 897 898 if offset == 0: 899 expressions_sql = f"OFFSET({expressions_sql})" 900 elif offset == 1: 901 expressions_sql = f"ORDINAL({expressions_sql})" 902 elif offset is not None: 903 self.unsupported(f"Unsupported array offset: {offset}") 904 905 if expression.args.get("safe"): 906 expressions_sql = f"SAFE_{expressions_sql}" 907 908 return f"{self.sql(this)}[{expressions_sql}]" 909 910 def in_unnest_op(self, expression: exp.Unnest) -> str: 911 return self.sql(expression) 912 913 def except_op(self, expression: exp.Except) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 916 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 917 918 def intersect_op(self, expression: exp.Intersect) -> str: 919 if not expression.args.get("distinct"): 920 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 921 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 922 923 def version_sql(self, expression: exp.Version) -> str: 924 if expression.name == "TIMESTAMP": 925 expression.set("this", "SYSTEM_TIME") 926 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
258 def normalize_identifier(self, expression: E) -> E: 259 if ( 260 isinstance(expression, exp.Identifier) 261 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 262 ): 263 parent = expression.parent 264 while isinstance(parent, exp.Dot): 265 parent = parent.parent 266 267 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 268 # by default. The following check uses a heuristic to detect tables based on whether 269 # they are qualified. This should generally be correct, because tables in BigQuery 270 # must be qualified with at least a dataset, unless @@dataset_id is set. 271 case_sensitive = ( 272 isinstance(parent, exp.UserDefinedFunction) 273 or ( 274 isinstance(parent, exp.Table) 275 and parent.db 276 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 277 ) 278 or expression.meta.get("is_table") 279 ) 280 if not case_sensitive: 281 expression.set("this", expression.this.lower()) 282 283 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
285 class Tokenizer(tokens.Tokenizer): 286 QUOTES = ["'", '"', '"""', "'''"] 287 COMMENTS = ["--", "#", ("/*", "*/")] 288 IDENTIFIERS = ["`"] 289 STRING_ESCAPES = ["\\"] 290 291 HEX_STRINGS = [("0x", ""), ("0X", "")] 292 293 BYTE_STRINGS = [ 294 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 295 ] 296 297 RAW_STRINGS = [ 298 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 299 ] 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "ANY TYPE": TokenType.VARIANT, 304 "BEGIN": TokenType.COMMAND, 305 "BEGIN TRANSACTION": TokenType.BEGIN, 306 "BYTES": TokenType.BINARY, 307 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 308 "DATETIME": TokenType.TIMESTAMP, 309 "DECLARE": TokenType.COMMAND, 310 "ELSEIF": TokenType.COMMAND, 311 "EXCEPTION": TokenType.COMMAND, 312 "FLOAT64": TokenType.DOUBLE, 313 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 314 "MODEL": TokenType.MODEL, 315 "NOT DETERMINISTIC": TokenType.VOLATILE, 316 "RECORD": TokenType.STRUCT, 317 "TIMESTAMP": TokenType.TIMESTAMPTZ, 318 } 319 KEYWORDS.pop("DIV") 320 KEYWORDS.pop("VALUES")
Inherited Members
322 class Parser(parser.Parser): 323 PREFIXED_PIVOT_COLUMNS = True 324 LOG_DEFAULTS_TO_LN = True 325 SUPPORTS_IMPLICIT_UNNEST = True 326 327 FUNCTIONS = { 328 **parser.Parser.FUNCTIONS, 329 "DATE": _build_date, 330 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 331 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 332 "DATE_TRUNC": lambda args: exp.DateTrunc( 333 unit=exp.Literal.string(str(seq_get(args, 1))), 334 this=seq_get(args, 0), 335 ), 336 "DATETIME": _build_datetime, 337 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _build_to_hex, 349 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _build_parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _build_time, 369 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP": _build_timestamp, 372 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 381 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 382 } 383 384 FUNCTION_PARSERS = { 385 **parser.Parser.FUNCTION_PARSERS, 386 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 387 } 388 FUNCTION_PARSERS.pop("TRIM") 389 390 NO_PAREN_FUNCTIONS = { 391 **parser.Parser.NO_PAREN_FUNCTIONS, 392 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 393 } 394 395 NESTED_TYPE_TOKENS = { 396 *parser.Parser.NESTED_TYPE_TOKENS, 397 TokenType.TABLE, 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "NOT DETERMINISTIC": lambda self: self.expression( 403 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 404 ), 405 "OPTIONS": lambda self: self._parse_with_property(), 406 } 407 408 CONSTRAINT_PARSERS = { 409 **parser.Parser.CONSTRAINT_PARSERS, 410 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 411 } 412 413 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 414 RANGE_PARSERS.pop(TokenType.OVERLAPS) 415 416 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 417 418 STATEMENT_PARSERS = { 419 **parser.Parser.STATEMENT_PARSERS, 420 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 421 TokenType.END: lambda self: self._parse_as_command(self._prev), 422 TokenType.FOR: lambda self: self._parse_for_in(), 423 } 424 425 BRACKET_OFFSETS = { 426 "OFFSET": (0, False), 427 "ORDINAL": (1, False), 428 "SAFE_OFFSET": (0, True), 429 "SAFE_ORDINAL": (1, True), 430 } 431 432 def _parse_for_in(self) -> exp.ForIn: 433 this = self._parse_range() 434 self._match_text_seq("DO") 435 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 436 437 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 438 this = super()._parse_table_part(schema=schema) or self._parse_number() 439 440 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 441 if isinstance(this, exp.Identifier): 442 table_name = this.name 443 while self._match(TokenType.DASH, advance=False) and self._next: 444 text = "" 445 while self._curr and self._curr.token_type != TokenType.DOT: 446 self._advance() 447 text += self._prev.text 448 table_name += text 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts( 462 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 463 ) -> exp.Table: 464 table = super()._parse_table_parts( 465 schema=schema, is_db_reference=is_db_reference, wildcard=True 466 ) 467 468 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 469 if not table.catalog: 470 if table.db: 471 parts = table.db.split(".") 472 if len(parts) == 2 and not table.args["db"].quoted: 473 table.set("catalog", exp.Identifier(this=parts[0])) 474 table.set("db", exp.Identifier(this=parts[1])) 475 else: 476 parts = table.name.split(".") 477 if len(parts) == 2 and not table.this.quoted: 478 table.set("db", exp.Identifier(this=parts[0])) 479 table.set("this", exp.Identifier(this=parts[1])) 480 481 if any("." in p.name for p in table.parts): 482 catalog, db, this, *rest = ( 483 exp.to_identifier(p, quoted=True) 484 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 485 ) 486 487 if rest and this: 488 this = exp.Dot.build([this, *rest]) # type: ignore 489 490 table = exp.Table( 491 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 492 ) 493 table.meta["quoted_table"] = True 494 495 return table 496 497 def _parse_column(self) -> t.Optional[exp.Expression]: 498 column = super()._parse_column() 499 if isinstance(column, exp.Column): 500 parts = column.parts 501 if any("." in p.name for p in parts): 502 catalog, db, table, this, *rest = ( 503 exp.to_identifier(p, quoted=True) 504 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 505 ) 506 507 if rest and this: 508 this = exp.Dot.build([this, *rest]) # type: ignore 509 510 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 511 column.meta["quoted_column"] = True 512 513 return column 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 517 518 @t.overload 519 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 520 521 def _parse_json_object(self, agg=False): 522 json_object = super()._parse_json_object() 523 array_kv_pair = seq_get(json_object.expressions, 0) 524 525 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 526 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 527 if ( 528 array_kv_pair 529 and isinstance(array_kv_pair.this, exp.Array) 530 and isinstance(array_kv_pair.expression, exp.Array) 531 ): 532 keys = array_kv_pair.this.expressions 533 values = array_kv_pair.expression.expressions 534 535 json_object.set( 536 "expressions", 537 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 538 ) 539 540 return json_object 541 542 def _parse_bracket( 543 self, this: t.Optional[exp.Expression] = None 544 ) -> t.Optional[exp.Expression]: 545 bracket = super()._parse_bracket(this) 546 547 if this is bracket: 548 return bracket 549 550 if isinstance(bracket, exp.Bracket): 551 for expression in bracket.expressions: 552 name = expression.name.upper() 553 554 if name not in self.BRACKET_OFFSETS: 555 break 556 557 offset, safe = self.BRACKET_OFFSETS[name] 558 bracket.set("offset", offset) 559 bracket.set("safe", safe) 560 expression.replace(expression.expressions[0]) 561 562 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
564 class Generator(generator.Generator): 565 EXPLICIT_SET_OP = True 566 INTERVAL_ALLOWS_PLURAL_FORM = False 567 JOIN_HINTS = False 568 QUERY_HINTS = False 569 TABLE_HINTS = False 570 LIMIT_FETCH = "LIMIT" 571 RENAME_TABLE_WITH_DB = False 572 NVL2_SUPPORTED = False 573 UNNEST_WITH_ORDINALITY = False 574 COLLATE_IS_FUNC = True 575 LIMIT_ONLY_LITERALS = True 576 SUPPORTS_TABLE_ALIAS_COLUMNS = False 577 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 578 JSON_KEY_VALUE_PAIR_SEP = "," 579 NULL_ORDERING_SUPPORTED = False 580 IGNORE_NULLS_IN_FUNC = True 581 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 582 CAN_IMPLEMENT_ARRAY_ANY = True 583 SUPPORTS_TO_NUMBER = False 584 NAMED_PLACEHOLDER_TOKEN = "@" 585 HEX_FUNC = "TO_HEX" 586 WITH_PROPERTIES_PREFIX = "OPTIONS" 587 588 TRANSFORMS = { 589 **generator.Generator.TRANSFORMS, 590 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 591 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 592 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 593 exp.Array: inline_array_unless_query, 594 exp.ArrayContains: _array_contains_sql, 595 exp.ArrayFilter: filter_array_using_unnest, 596 exp.ArraySize: rename_func("ARRAY_LENGTH"), 597 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 598 exp.CollateProperty: lambda self, e: ( 599 f"DEFAULT COLLATE {self.sql(e, 'this')}" 600 if e.args.get("default") 601 else f"COLLATE {self.sql(e, 'this')}" 602 ), 603 exp.Commit: lambda *_: "COMMIT TRANSACTION", 604 exp.CountIf: rename_func("COUNTIF"), 605 exp.Create: _create_sql, 606 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 607 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 608 exp.DateDiff: lambda self, e: self.func( 609 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 610 ), 611 exp.DateFromParts: rename_func("DATE"), 612 exp.DateStrToDate: datestrtodate_sql, 613 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 614 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 615 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 616 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 617 exp.FromTimeZone: lambda self, e: self.func( 618 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 619 ), 620 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 621 exp.GroupConcat: rename_func("STRING_AGG"), 622 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 623 exp.If: if_sql(false_value="NULL"), 624 exp.ILike: no_ilike_sql, 625 exp.IntDiv: rename_func("DIV"), 626 exp.JSONFormat: rename_func("TO_JSON_STRING"), 627 exp.Max: max_or_greatest, 628 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 629 exp.MD5Digest: rename_func("MD5"), 630 exp.Min: min_or_least, 631 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 632 exp.RegexpExtract: lambda self, e: self.func( 633 "REGEXP_EXTRACT", 634 e.this, 635 e.expression, 636 e.args.get("position"), 637 e.args.get("occurrence"), 638 ), 639 exp.RegexpReplace: regexp_replace_sql, 640 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 641 exp.ReturnsProperty: _returnsproperty_sql, 642 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 643 exp.Select: transforms.preprocess( 644 [ 645 transforms.explode_to_unnest(), 646 transforms.unqualify_unnest, 647 transforms.eliminate_distinct_on, 648 _alias_ordered_group, 649 transforms.eliminate_semi_and_anti_joins, 650 ] 651 ), 652 exp.SHA: rename_func("SHA1"), 653 exp.SHA2: sha256_sql, 654 exp.StabilityProperty: lambda self, e: ( 655 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 656 ), 657 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 658 exp.StrToTime: lambda self, e: self.func( 659 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 660 ), 661 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 662 exp.TimeFromParts: rename_func("TIME"), 663 exp.TimestampFromParts: rename_func("DATETIME"), 664 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 665 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 666 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 667 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 668 exp.TimeStrToTime: timestrtotime_sql, 669 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 670 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 671 exp.TsOrDsAdd: _ts_or_ds_add_sql, 672 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 673 exp.TsOrDsToTime: rename_func("TIME"), 674 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 675 exp.Unhex: rename_func("FROM_HEX"), 676 exp.UnixDate: rename_func("UNIX_DATE"), 677 exp.UnixToTime: _unix_to_time_sql, 678 exp.Values: _derived_table_values_to_unnest, 679 exp.VariancePop: rename_func("VAR_POP"), 680 } 681 682 SUPPORTED_JSON_PATH_PARTS = { 683 exp.JSONPathKey, 684 exp.JSONPathRoot, 685 exp.JSONPathSubscript, 686 } 687 688 TYPE_MAPPING = { 689 **generator.Generator.TYPE_MAPPING, 690 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 691 exp.DataType.Type.BIGINT: "INT64", 692 exp.DataType.Type.BINARY: "BYTES", 693 exp.DataType.Type.BOOLEAN: "BOOL", 694 exp.DataType.Type.CHAR: "STRING", 695 exp.DataType.Type.DECIMAL: "NUMERIC", 696 exp.DataType.Type.DOUBLE: "FLOAT64", 697 exp.DataType.Type.FLOAT: "FLOAT64", 698 exp.DataType.Type.INT: "INT64", 699 exp.DataType.Type.NCHAR: "STRING", 700 exp.DataType.Type.NVARCHAR: "STRING", 701 exp.DataType.Type.SMALLINT: "INT64", 702 exp.DataType.Type.TEXT: "STRING", 703 exp.DataType.Type.TIMESTAMP: "DATETIME", 704 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 705 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 706 exp.DataType.Type.TINYINT: "INT64", 707 exp.DataType.Type.VARBINARY: "BYTES", 708 exp.DataType.Type.ROWVERSION: "BYTES", 709 exp.DataType.Type.VARCHAR: "STRING", 710 exp.DataType.Type.VARIANT: "ANY TYPE", 711 } 712 713 PROPERTIES_LOCATION = { 714 **generator.Generator.PROPERTIES_LOCATION, 715 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 716 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 717 } 718 719 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 720 RESERVED_KEYWORDS = { 721 "all", 722 "and", 723 "any", 724 "array", 725 "as", 726 "asc", 727 "assert_rows_modified", 728 "at", 729 "between", 730 "by", 731 "case", 732 "cast", 733 "collate", 734 "contains", 735 "create", 736 "cross", 737 "cube", 738 "current", 739 "default", 740 "define", 741 "desc", 742 "distinct", 743 "else", 744 "end", 745 "enum", 746 "escape", 747 "except", 748 "exclude", 749 "exists", 750 "extract", 751 "false", 752 "fetch", 753 "following", 754 "for", 755 "from", 756 "full", 757 "group", 758 "grouping", 759 "groups", 760 "hash", 761 "having", 762 "if", 763 "ignore", 764 "in", 765 "inner", 766 "intersect", 767 "interval", 768 "into", 769 "is", 770 "join", 771 "lateral", 772 "left", 773 "like", 774 "limit", 775 "lookup", 776 "merge", 777 "natural", 778 "new", 779 "no", 780 "not", 781 "null", 782 "nulls", 783 "of", 784 "on", 785 "or", 786 "order", 787 "outer", 788 "over", 789 "partition", 790 "preceding", 791 "proto", 792 "qualify", 793 "range", 794 "recursive", 795 "respect", 796 "right", 797 "rollup", 798 "rows", 799 "select", 800 "set", 801 "some", 802 "struct", 803 "tablesample", 804 "then", 805 "to", 806 "treat", 807 "true", 808 "unbounded", 809 "union", 810 "unnest", 811 "using", 812 "when", 813 "where", 814 "window", 815 "with", 816 "within", 817 } 818 819 def mod_sql(self, expression: exp.Mod) -> str: 820 this = expression.this 821 expr = expression.expression 822 return self.func( 823 "MOD", 824 this.unnest() if isinstance(this, exp.Paren) else this, 825 expr.unnest() if isinstance(expr, exp.Paren) else expr, 826 ) 827 828 def column_parts(self, expression: exp.Column) -> str: 829 if expression.meta.get("quoted_column"): 830 # If a column reference is of the form `dataset.table`.name, we need 831 # to preserve the quoted table path, otherwise the reference breaks 832 table_parts = ".".join(p.name for p in expression.parts[:-1]) 833 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 834 return f"{table_path}.{self.sql(expression, 'this')}" 835 836 return super().column_parts(expression) 837 838 def table_parts(self, expression: exp.Table) -> str: 839 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 840 # we need to make sure the correct quoting is used in each case. 841 # 842 # For example, if there is a CTE x that clashes with a schema name, then the former will 843 # return the table y in that schema, whereas the latter will return the CTE's y column: 844 # 845 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 846 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 847 if expression.meta.get("quoted_table"): 848 table_parts = ".".join(p.name for p in expression.parts) 849 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 850 851 return super().table_parts(expression) 852 853 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 854 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 855 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 856 857 def eq_sql(self, expression: exp.EQ) -> str: 858 # Operands of = cannot be NULL in BigQuery 859 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 860 if not isinstance(expression.parent, exp.Update): 861 return "NULL" 862 863 return self.binary(expression, "=") 864 865 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 866 parent = expression.parent 867 868 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 869 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 870 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 871 return self.func( 872 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 873 ) 874 875 return super().attimezone_sql(expression) 876 877 def trycast_sql(self, expression: exp.TryCast) -> str: 878 return self.cast_sql(expression, safe_prefix="SAFE_") 879 880 def bracket_sql(self, expression: exp.Bracket) -> str: 881 this = expression.this 882 expressions = expression.expressions 883 884 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 885 arg = expressions[0] 886 if arg.type is None: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 892 # BQ doesn't support bracket syntax with string values for structs 893 return f"{self.sql(this)}.{arg.name}" 894 895 expressions_sql = self.expressions(expression, flat=True) 896 offset = expression.args.get("offset") 897 898 if offset == 0: 899 expressions_sql = f"OFFSET({expressions_sql})" 900 elif offset == 1: 901 expressions_sql = f"ORDINAL({expressions_sql})" 902 elif offset is not None: 903 self.unsupported(f"Unsupported array offset: {offset}") 904 905 if expression.args.get("safe"): 906 expressions_sql = f"SAFE_{expressions_sql}" 907 908 return f"{self.sql(this)}[{expressions_sql}]" 909 910 def in_unnest_op(self, expression: exp.Unnest) -> str: 911 return self.sql(expression) 912 913 def except_op(self, expression: exp.Except) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 916 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 917 918 def intersect_op(self, expression: exp.Intersect) -> str: 919 if not expression.args.get("distinct"): 920 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 921 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 922 923 def version_sql(self, expression: exp.Version) -> str: 924 if expression.name == "TIMESTAMP": 925 expression.set("this", "SYSTEM_TIME") 926 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
828 def column_parts(self, expression: exp.Column) -> str: 829 if expression.meta.get("quoted_column"): 830 # If a column reference is of the form `dataset.table`.name, we need 831 # to preserve the quoted table path, otherwise the reference breaks 832 table_parts = ".".join(p.name for p in expression.parts[:-1]) 833 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 834 return f"{table_path}.{self.sql(expression, 'this')}" 835 836 return super().column_parts(expression)
838 def table_parts(self, expression: exp.Table) -> str: 839 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 840 # we need to make sure the correct quoting is used in each case. 841 # 842 # For example, if there is a CTE x that clashes with a schema name, then the former will 843 # return the table y in that schema, whereas the latter will return the CTE's y column: 844 # 845 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 846 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 847 if expression.meta.get("quoted_table"): 848 table_parts = ".".join(p.name for p in expression.parts) 849 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 850 851 return super().table_parts(expression)
865 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 866 parent = expression.parent 867 868 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 869 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 870 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 871 return self.func( 872 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 873 ) 874 875 return super().attimezone_sql(expression)
880 def bracket_sql(self, expression: exp.Bracket) -> str: 881 this = expression.this 882 expressions = expression.expressions 883 884 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 885 arg = expressions[0] 886 if arg.type is None: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 892 # BQ doesn't support bracket syntax with string values for structs 893 return f"{self.sql(this)}.{arg.name}" 894 895 expressions_sql = self.expressions(expression, flat=True) 896 offset = expression.args.get("offset") 897 898 if offset == 0: 899 expressions_sql = f"OFFSET({expressions_sql})" 900 elif offset == 1: 901 expressions_sql = f"ORDINAL({expressions_sql})" 902 elif offset is not None: 903 self.unsupported(f"Unsupported array offset: {offset}") 904 905 if expression.args.get("safe"): 906 expressions_sql = f"SAFE_{expressions_sql}" 907 908 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql