sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("zone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 109 ts = expression.this 110 111 tz = expression.args.get("zone") 112 if tz and isinstance(ts, exp.Literal): 113 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 114 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 115 # 116 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 117 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 118 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 119 ts_string = ts.name.strip() 120 121 # separate [date and time] from [fractional seconds and UTC offset] 122 ts_parts = ts_string.split(".") 123 if len(ts_parts) == 2: 124 # separate fractional seconds and UTC offset 125 offset_sep = "+" if "+" in ts_parts[1] else "-" 126 ts_frac_parts = ts_parts[1].split(offset_sep) 127 num_frac_parts = len(ts_frac_parts) 128 129 # pad to 6 digits if fractional seconds present 130 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 131 ts_string = "".join( 132 [ 133 ts_parts[0], # date and time 134 ".", 135 ts_frac_parts[0], # fractional seconds 136 offset_sep if num_frac_parts > 1 else "", 137 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 138 ] 139 ) 140 141 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 142 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 143 # it's part of the timestamp string 144 ts_without_tz = ( 145 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 146 ) 147 ts = exp.Literal.string(ts_without_tz) 148 149 # Non-nullable DateTime64 with microsecond precision 150 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 151 datatype = exp.DataType.build( 152 exp.DataType.Type.DATETIME64, 153 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 154 nullable=False, 155 ) 156 157 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 158 159 160class ClickHouse(Dialect): 161 NORMALIZE_FUNCTIONS: bool | str = False 162 NULL_ORDERING = "nulls_are_last" 163 SUPPORTS_USER_DEFINED_TYPES = False 164 SAFE_DIVISION = True 165 LOG_BASE_FIRST: t.Optional[bool] = None 166 FORCE_EARLY_ALIAS_REF_EXPANSION = True 167 168 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 169 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 170 171 UNESCAPED_SEQUENCES = { 172 "\\0": "\0", 173 } 174 175 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 176 177 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 178 exp.Except: False, 179 exp.Intersect: False, 180 exp.Union: None, 181 } 182 183 class Tokenizer(tokens.Tokenizer): 184 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 185 IDENTIFIERS = ['"', "`"] 186 STRING_ESCAPES = ["'", "\\"] 187 BIT_STRINGS = [("0b", "")] 188 HEX_STRINGS = [("0x", ""), ("0X", "")] 189 HEREDOC_STRINGS = ["$"] 190 191 KEYWORDS = { 192 **tokens.Tokenizer.KEYWORDS, 193 "ATTACH": TokenType.COMMAND, 194 "DATE32": TokenType.DATE32, 195 "DATETIME64": TokenType.DATETIME64, 196 "DICTIONARY": TokenType.DICTIONARY, 197 "ENUM8": TokenType.ENUM8, 198 "ENUM16": TokenType.ENUM16, 199 "FINAL": TokenType.FINAL, 200 "FIXEDSTRING": TokenType.FIXEDSTRING, 201 "FLOAT32": TokenType.FLOAT, 202 "FLOAT64": TokenType.DOUBLE, 203 "GLOBAL": TokenType.GLOBAL, 204 "INT256": TokenType.INT256, 205 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 206 "MAP": TokenType.MAP, 207 "NESTED": TokenType.NESTED, 208 "SAMPLE": TokenType.TABLE_SAMPLE, 209 "TUPLE": TokenType.STRUCT, 210 "UINT128": TokenType.UINT128, 211 "UINT16": TokenType.USMALLINT, 212 "UINT256": TokenType.UINT256, 213 "UINT32": TokenType.UINT, 214 "UINT64": TokenType.UBIGINT, 215 "UINT8": TokenType.UTINYINT, 216 "IPV4": TokenType.IPV4, 217 "IPV6": TokenType.IPV6, 218 "POINT": TokenType.POINT, 219 "RING": TokenType.RING, 220 "LINESTRING": TokenType.LINESTRING, 221 "MULTILINESTRING": TokenType.MULTILINESTRING, 222 "POLYGON": TokenType.POLYGON, 223 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 224 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 225 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 226 "SYSTEM": TokenType.COMMAND, 227 "PREWHERE": TokenType.PREWHERE, 228 } 229 KEYWORDS.pop("/*+") 230 231 SINGLE_TOKENS = { 232 **tokens.Tokenizer.SINGLE_TOKENS, 233 "$": TokenType.HEREDOC_STRING, 234 } 235 236 class Parser(parser.Parser): 237 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 238 # * select x from t1 union all select x from t2 limit 1; 239 # * select x from t1 union all (select x from t2 limit 1); 240 MODIFIERS_ATTACHED_TO_SET_OP = False 241 INTERVAL_SPANS = False 242 243 FUNCTIONS = { 244 **parser.Parser.FUNCTIONS, 245 "ANY": exp.AnyValue.from_arg_list, 246 "ARRAYSUM": exp.ArraySum.from_arg_list, 247 "COUNTIF": _build_count_if, 248 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 249 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 250 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 251 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 252 "DATE_FORMAT": _build_date_format, 253 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 254 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 255 "FORMATDATETIME": _build_date_format, 256 "JSONEXTRACTSTRING": build_json_extract_path( 257 exp.JSONExtractScalar, zero_based_indexing=False 258 ), 259 "MAP": parser.build_var_map, 260 "MATCH": exp.RegexpLike.from_arg_list, 261 "RANDCANONICAL": exp.Rand.from_arg_list, 262 "STR_TO_DATE": _build_str_to_date, 263 "TUPLE": exp.Struct.from_arg_list, 264 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 265 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 266 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 267 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 268 "UNIQ": exp.ApproxDistinct.from_arg_list, 269 "XOR": lambda args: exp.Xor(expressions=args), 270 "MD5": exp.MD5Digest.from_arg_list, 271 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 272 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 273 } 274 275 AGG_FUNCTIONS = { 276 "count", 277 "min", 278 "max", 279 "sum", 280 "avg", 281 "any", 282 "stddevPop", 283 "stddevSamp", 284 "varPop", 285 "varSamp", 286 "corr", 287 "covarPop", 288 "covarSamp", 289 "entropy", 290 "exponentialMovingAverage", 291 "intervalLengthSum", 292 "kolmogorovSmirnovTest", 293 "mannWhitneyUTest", 294 "median", 295 "rankCorr", 296 "sumKahan", 297 "studentTTest", 298 "welchTTest", 299 "anyHeavy", 300 "anyLast", 301 "boundingRatio", 302 "first_value", 303 "last_value", 304 "argMin", 305 "argMax", 306 "avgWeighted", 307 "topK", 308 "topKWeighted", 309 "deltaSum", 310 "deltaSumTimestamp", 311 "groupArray", 312 "groupArrayLast", 313 "groupUniqArray", 314 "groupArrayInsertAt", 315 "groupArrayMovingAvg", 316 "groupArrayMovingSum", 317 "groupArraySample", 318 "groupBitAnd", 319 "groupBitOr", 320 "groupBitXor", 321 "groupBitmap", 322 "groupBitmapAnd", 323 "groupBitmapOr", 324 "groupBitmapXor", 325 "sumWithOverflow", 326 "sumMap", 327 "minMap", 328 "maxMap", 329 "skewSamp", 330 "skewPop", 331 "kurtSamp", 332 "kurtPop", 333 "uniq", 334 "uniqExact", 335 "uniqCombined", 336 "uniqCombined64", 337 "uniqHLL12", 338 "uniqTheta", 339 "quantile", 340 "quantiles", 341 "quantileExact", 342 "quantilesExact", 343 "quantileExactLow", 344 "quantilesExactLow", 345 "quantileExactHigh", 346 "quantilesExactHigh", 347 "quantileExactWeighted", 348 "quantilesExactWeighted", 349 "quantileTiming", 350 "quantilesTiming", 351 "quantileTimingWeighted", 352 "quantilesTimingWeighted", 353 "quantileDeterministic", 354 "quantilesDeterministic", 355 "quantileTDigest", 356 "quantilesTDigest", 357 "quantileTDigestWeighted", 358 "quantilesTDigestWeighted", 359 "quantileBFloat16", 360 "quantilesBFloat16", 361 "quantileBFloat16Weighted", 362 "quantilesBFloat16Weighted", 363 "simpleLinearRegression", 364 "stochasticLinearRegression", 365 "stochasticLogisticRegression", 366 "categoricalInformationValue", 367 "contingency", 368 "cramersV", 369 "cramersVBiasCorrected", 370 "theilsU", 371 "maxIntersections", 372 "maxIntersectionsPosition", 373 "meanZTest", 374 "quantileInterpolatedWeighted", 375 "quantilesInterpolatedWeighted", 376 "quantileGK", 377 "quantilesGK", 378 "sparkBar", 379 "sumCount", 380 "largestTriangleThreeBuckets", 381 "histogram", 382 "sequenceMatch", 383 "sequenceCount", 384 "windowFunnel", 385 "retention", 386 "uniqUpTo", 387 "sequenceNextNode", 388 "exponentialTimeDecayedAvg", 389 } 390 391 AGG_FUNCTIONS_SUFFIXES = [ 392 "If", 393 "Array", 394 "ArrayIf", 395 "Map", 396 "SimpleState", 397 "State", 398 "Merge", 399 "MergeState", 400 "ForEach", 401 "Distinct", 402 "OrDefault", 403 "OrNull", 404 "Resample", 405 "ArgMin", 406 "ArgMax", 407 ] 408 409 FUNC_TOKENS = { 410 *parser.Parser.FUNC_TOKENS, 411 TokenType.SET, 412 } 413 414 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 415 416 ID_VAR_TOKENS = { 417 *parser.Parser.ID_VAR_TOKENS, 418 TokenType.LIKE, 419 } 420 421 AGG_FUNC_MAPPING = ( 422 lambda functions, suffixes: { 423 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 424 } 425 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 426 427 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 428 429 FUNCTION_PARSERS = { 430 **parser.Parser.FUNCTION_PARSERS, 431 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 432 "QUANTILE": lambda self: self._parse_quantile(), 433 "COLUMNS": lambda self: self._parse_columns(), 434 } 435 436 FUNCTION_PARSERS.pop("MATCH") 437 438 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 439 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 440 441 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 442 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 443 444 RANGE_PARSERS = { 445 **parser.Parser.RANGE_PARSERS, 446 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 447 and self._parse_in(this, is_global=True), 448 } 449 450 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 451 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 452 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 453 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 454 455 JOIN_KINDS = { 456 *parser.Parser.JOIN_KINDS, 457 TokenType.ANY, 458 TokenType.ASOF, 459 TokenType.ARRAY, 460 } 461 462 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 463 TokenType.ANY, 464 TokenType.ARRAY, 465 TokenType.FINAL, 466 TokenType.FORMAT, 467 TokenType.SETTINGS, 468 } 469 470 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 471 TokenType.FORMAT, 472 } 473 474 LOG_DEFAULTS_TO_LN = True 475 476 QUERY_MODIFIER_PARSERS = { 477 **parser.Parser.QUERY_MODIFIER_PARSERS, 478 TokenType.SETTINGS: lambda self: ( 479 "settings", 480 self._advance() or self._parse_csv(self._parse_assignment), 481 ), 482 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 483 } 484 485 CONSTRAINT_PARSERS = { 486 **parser.Parser.CONSTRAINT_PARSERS, 487 "INDEX": lambda self: self._parse_index_constraint(), 488 "CODEC": lambda self: self._parse_compress(), 489 } 490 491 ALTER_PARSERS = { 492 **parser.Parser.ALTER_PARSERS, 493 "REPLACE": lambda self: self._parse_alter_table_replace(), 494 } 495 496 SCHEMA_UNNAMED_CONSTRAINTS = { 497 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 498 "INDEX", 499 } 500 501 PLACEHOLDER_PARSERS = { 502 **parser.Parser.PLACEHOLDER_PARSERS, 503 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 504 } 505 506 def _parse_types( 507 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 508 ) -> t.Optional[exp.Expression]: 509 dtype = super()._parse_types( 510 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 511 ) 512 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 513 # Mark every type as non-nullable which is ClickHouse's default, unless it's 514 # already marked as nullable. This marker helps us transpile types from other 515 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 516 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 517 # fail in ClickHouse without the `Nullable` type constructor. 518 dtype.set("nullable", False) 519 520 return dtype 521 522 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 523 index = self._index 524 this = self._parse_bitwise() 525 if self._match(TokenType.FROM): 526 self._retreat(index) 527 return super()._parse_extract() 528 529 # We return Anonymous here because extract and regexpExtract have different semantics, 530 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 531 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 532 # 533 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 534 self._match(TokenType.COMMA) 535 return self.expression( 536 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 537 ) 538 539 def _parse_assignment(self) -> t.Optional[exp.Expression]: 540 this = super()._parse_assignment() 541 542 if self._match(TokenType.PLACEHOLDER): 543 return self.expression( 544 exp.If, 545 this=this, 546 true=self._parse_assignment(), 547 false=self._match(TokenType.COLON) and self._parse_assignment(), 548 ) 549 550 return this 551 552 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 553 """ 554 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 555 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 556 """ 557 this = self._parse_id_var() 558 self._match(TokenType.COLON) 559 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 560 self._match_text_seq("IDENTIFIER") and "Identifier" 561 ) 562 563 if not kind: 564 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 565 elif not self._match(TokenType.R_BRACE): 566 self.raise_error("Expecting }") 567 568 return self.expression(exp.Placeholder, this=this, kind=kind) 569 570 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 571 this = super()._parse_in(this) 572 this.set("is_global", is_global) 573 return this 574 575 def _parse_table( 576 self, 577 schema: bool = False, 578 joins: bool = False, 579 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 580 parse_bracket: bool = False, 581 is_db_reference: bool = False, 582 parse_partition: bool = False, 583 ) -> t.Optional[exp.Expression]: 584 this = super()._parse_table( 585 schema=schema, 586 joins=joins, 587 alias_tokens=alias_tokens, 588 parse_bracket=parse_bracket, 589 is_db_reference=is_db_reference, 590 ) 591 592 if self._match(TokenType.FINAL): 593 this = self.expression(exp.Final, this=this) 594 595 return this 596 597 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 598 return super()._parse_position(haystack_first=True) 599 600 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 601 def _parse_cte(self) -> exp.CTE: 602 # WITH <identifier> AS <subquery expression> 603 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 604 605 if not cte: 606 # WITH <expression> AS <identifier> 607 cte = self.expression( 608 exp.CTE, 609 this=self._parse_assignment(), 610 alias=self._parse_table_alias(), 611 scalar=True, 612 ) 613 614 return cte 615 616 def _parse_join_parts( 617 self, 618 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 619 is_global = self._match(TokenType.GLOBAL) and self._prev 620 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 621 622 if kind_pre: 623 kind = self._match_set(self.JOIN_KINDS) and self._prev 624 side = self._match_set(self.JOIN_SIDES) and self._prev 625 return is_global, side, kind 626 627 return ( 628 is_global, 629 self._match_set(self.JOIN_SIDES) and self._prev, 630 self._match_set(self.JOIN_KINDS) and self._prev, 631 ) 632 633 def _parse_join( 634 self, skip_join_token: bool = False, parse_bracket: bool = False 635 ) -> t.Optional[exp.Join]: 636 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 637 if join: 638 join.set("global", join.args.pop("method", None)) 639 640 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 641 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 642 if join.kind == "ARRAY": 643 for table in join.find_all(exp.Table): 644 table.replace(table.to_column()) 645 646 return join 647 648 def _parse_function( 649 self, 650 functions: t.Optional[t.Dict[str, t.Callable]] = None, 651 anonymous: bool = False, 652 optional_parens: bool = True, 653 any_token: bool = False, 654 ) -> t.Optional[exp.Expression]: 655 expr = super()._parse_function( 656 functions=functions, 657 anonymous=anonymous, 658 optional_parens=optional_parens, 659 any_token=any_token, 660 ) 661 662 func = expr.this if isinstance(expr, exp.Window) else expr 663 664 # Aggregate functions can be split in 2 parts: <func_name><suffix> 665 parts = ( 666 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 667 ) 668 669 if parts: 670 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 671 params = self._parse_func_params(anon_func) 672 673 kwargs = { 674 "this": anon_func.this, 675 "expressions": anon_func.expressions, 676 } 677 if parts[1]: 678 kwargs["parts"] = parts 679 exp_class: t.Type[exp.Expression] = ( 680 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 681 ) 682 else: 683 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 684 685 kwargs["exp_class"] = exp_class 686 if params: 687 kwargs["params"] = params 688 689 func = self.expression(**kwargs) 690 691 if isinstance(expr, exp.Window): 692 # The window's func was parsed as Anonymous in base parser, fix its 693 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 694 expr.set("this", func) 695 elif params: 696 # Params have blocked super()._parse_function() from parsing the following window 697 # (if that exists) as they're standing between the function call and the window spec 698 expr = self._parse_window(func) 699 else: 700 expr = func 701 702 return expr 703 704 def _parse_func_params( 705 self, this: t.Optional[exp.Func] = None 706 ) -> t.Optional[t.List[exp.Expression]]: 707 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 708 return self._parse_csv(self._parse_lambda) 709 710 if self._match(TokenType.L_PAREN): 711 params = self._parse_csv(self._parse_lambda) 712 self._match_r_paren(this) 713 return params 714 715 return None 716 717 def _parse_quantile(self) -> exp.Quantile: 718 this = self._parse_lambda() 719 params = self._parse_func_params() 720 if params: 721 return self.expression(exp.Quantile, this=params[0], quantile=this) 722 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 723 724 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 725 return super()._parse_wrapped_id_vars(optional=True) 726 727 def _parse_primary_key( 728 self, wrapped_optional: bool = False, in_props: bool = False 729 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 730 return super()._parse_primary_key( 731 wrapped_optional=wrapped_optional or in_props, in_props=in_props 732 ) 733 734 def _parse_on_property(self) -> t.Optional[exp.Expression]: 735 index = self._index 736 if self._match_text_seq("CLUSTER"): 737 this = self._parse_id_var() 738 if this: 739 return self.expression(exp.OnCluster, this=this) 740 else: 741 self._retreat(index) 742 return None 743 744 def _parse_index_constraint( 745 self, kind: t.Optional[str] = None 746 ) -> exp.IndexColumnConstraint: 747 # INDEX name1 expr TYPE type1(args) GRANULARITY value 748 this = self._parse_id_var() 749 expression = self._parse_assignment() 750 751 index_type = self._match_text_seq("TYPE") and ( 752 self._parse_function() or self._parse_var() 753 ) 754 755 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 756 757 return self.expression( 758 exp.IndexColumnConstraint, 759 this=this, 760 expression=expression, 761 index_type=index_type, 762 granularity=granularity, 763 ) 764 765 def _parse_partition(self) -> t.Optional[exp.Partition]: 766 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 767 if not self._match(TokenType.PARTITION): 768 return None 769 770 if self._match_text_seq("ID"): 771 # Corresponds to the PARTITION ID <string_value> syntax 772 expressions: t.List[exp.Expression] = [ 773 self.expression(exp.PartitionId, this=self._parse_string()) 774 ] 775 else: 776 expressions = self._parse_expressions() 777 778 return self.expression(exp.Partition, expressions=expressions) 779 780 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 781 partition = self._parse_partition() 782 783 if not partition or not self._match(TokenType.FROM): 784 return None 785 786 return self.expression( 787 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 788 ) 789 790 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 791 if not self._match_text_seq("PROJECTION"): 792 return None 793 794 return self.expression( 795 exp.ProjectionDef, 796 this=self._parse_id_var(), 797 expression=self._parse_wrapped(self._parse_statement), 798 ) 799 800 def _parse_constraint(self) -> t.Optional[exp.Expression]: 801 return super()._parse_constraint() or self._parse_projection_def() 802 803 def _parse_alias( 804 self, this: t.Optional[exp.Expression], explicit: bool = False 805 ) -> t.Optional[exp.Expression]: 806 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 807 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 808 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 809 return this 810 811 return super()._parse_alias(this=this, explicit=explicit) 812 813 def _parse_expression(self) -> t.Optional[exp.Expression]: 814 this = super()._parse_expression() 815 816 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 817 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 818 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 819 self._match(TokenType.R_PAREN) 820 821 return this 822 823 def _parse_columns(self) -> exp.Expression: 824 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 825 826 while self._next and self._match_text_seq(")", "APPLY", "("): 827 self._match(TokenType.R_PAREN) 828 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 829 return this 830 831 class Generator(generator.Generator): 832 QUERY_HINTS = False 833 STRUCT_DELIMITER = ("(", ")") 834 NVL2_SUPPORTED = False 835 TABLESAMPLE_REQUIRES_PARENS = False 836 TABLESAMPLE_SIZE_IS_ROWS = False 837 TABLESAMPLE_KEYWORDS = "SAMPLE" 838 LAST_DAY_SUPPORTS_DATE_PART = False 839 CAN_IMPLEMENT_ARRAY_ANY = True 840 SUPPORTS_TO_NUMBER = False 841 JOIN_HINTS = False 842 TABLE_HINTS = False 843 GROUPINGS_SEP = "" 844 SET_OP_MODIFIERS = False 845 SUPPORTS_TABLE_ALIAS_COLUMNS = False 846 VALUES_AS_TABLE = False 847 848 STRING_TYPE_MAPPING = { 849 exp.DataType.Type.CHAR: "String", 850 exp.DataType.Type.LONGBLOB: "String", 851 exp.DataType.Type.LONGTEXT: "String", 852 exp.DataType.Type.MEDIUMBLOB: "String", 853 exp.DataType.Type.MEDIUMTEXT: "String", 854 exp.DataType.Type.TINYBLOB: "String", 855 exp.DataType.Type.TINYTEXT: "String", 856 exp.DataType.Type.TEXT: "String", 857 exp.DataType.Type.VARBINARY: "String", 858 exp.DataType.Type.VARCHAR: "String", 859 } 860 861 SUPPORTED_JSON_PATH_PARTS = { 862 exp.JSONPathKey, 863 exp.JSONPathRoot, 864 exp.JSONPathSubscript, 865 } 866 867 TYPE_MAPPING = { 868 **generator.Generator.TYPE_MAPPING, 869 **STRING_TYPE_MAPPING, 870 exp.DataType.Type.ARRAY: "Array", 871 exp.DataType.Type.BOOLEAN: "Bool", 872 exp.DataType.Type.BIGINT: "Int64", 873 exp.DataType.Type.DATE32: "Date32", 874 exp.DataType.Type.DATETIME: "DateTime", 875 exp.DataType.Type.DATETIME64: "DateTime64", 876 exp.DataType.Type.TIMESTAMP: "DateTime", 877 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 878 exp.DataType.Type.DOUBLE: "Float64", 879 exp.DataType.Type.ENUM: "Enum", 880 exp.DataType.Type.ENUM8: "Enum8", 881 exp.DataType.Type.ENUM16: "Enum16", 882 exp.DataType.Type.FIXEDSTRING: "FixedString", 883 exp.DataType.Type.FLOAT: "Float32", 884 exp.DataType.Type.INT: "Int32", 885 exp.DataType.Type.MEDIUMINT: "Int32", 886 exp.DataType.Type.INT128: "Int128", 887 exp.DataType.Type.INT256: "Int256", 888 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 889 exp.DataType.Type.MAP: "Map", 890 exp.DataType.Type.NESTED: "Nested", 891 exp.DataType.Type.SMALLINT: "Int16", 892 exp.DataType.Type.STRUCT: "Tuple", 893 exp.DataType.Type.TINYINT: "Int8", 894 exp.DataType.Type.UBIGINT: "UInt64", 895 exp.DataType.Type.UINT: "UInt32", 896 exp.DataType.Type.UINT128: "UInt128", 897 exp.DataType.Type.UINT256: "UInt256", 898 exp.DataType.Type.USMALLINT: "UInt16", 899 exp.DataType.Type.UTINYINT: "UInt8", 900 exp.DataType.Type.IPV4: "IPv4", 901 exp.DataType.Type.IPV6: "IPv6", 902 exp.DataType.Type.POINT: "Point", 903 exp.DataType.Type.RING: "Ring", 904 exp.DataType.Type.LINESTRING: "LineString", 905 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 906 exp.DataType.Type.POLYGON: "Polygon", 907 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 908 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 909 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 910 } 911 912 TRANSFORMS = { 913 **generator.Generator.TRANSFORMS, 914 exp.AnyValue: rename_func("any"), 915 exp.ApproxDistinct: rename_func("uniq"), 916 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 917 exp.ArraySize: rename_func("LENGTH"), 918 exp.ArraySum: rename_func("arraySum"), 919 exp.ArgMax: arg_max_or_min_no_count("argMax"), 920 exp.ArgMin: arg_max_or_min_no_count("argMin"), 921 exp.Array: inline_array_sql, 922 exp.CastToStrType: rename_func("CAST"), 923 exp.CountIf: rename_func("countIf"), 924 exp.CompressColumnConstraint: lambda self, 925 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 926 exp.ComputedColumnConstraint: lambda self, 927 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 928 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 929 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 930 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 931 exp.DateStrToDate: rename_func("toDate"), 932 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 933 exp.Explode: rename_func("arrayJoin"), 934 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 935 exp.IsNan: rename_func("isNaN"), 936 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 937 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 938 exp.JSONPathKey: json_path_key_only_name, 939 exp.JSONPathRoot: lambda *_: "", 940 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 941 exp.Nullif: rename_func("nullIf"), 942 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 943 exp.Pivot: no_pivot_sql, 944 exp.Quantile: _quantile_sql, 945 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 946 exp.Rand: rename_func("randCanonical"), 947 exp.StartsWith: rename_func("startsWith"), 948 exp.StrPosition: lambda self, e: self.func( 949 "position", e.this, e.args.get("substr"), e.args.get("position") 950 ), 951 exp.TimeToStr: lambda self, e: self.func( 952 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 953 ), 954 exp.TimeStrToTime: _timestrtotime_sql, 955 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 956 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 957 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 958 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 959 exp.MD5Digest: rename_func("MD5"), 960 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 961 exp.SHA: rename_func("SHA1"), 962 exp.SHA2: sha256_sql, 963 exp.UnixToTime: _unix_to_time_sql, 964 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 965 exp.Trim: trim_sql, 966 exp.Variance: rename_func("varSamp"), 967 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 968 exp.Stddev: rename_func("stddevSamp"), 969 exp.Chr: rename_func("CHAR"), 970 exp.Lag: lambda self, e: self.func( 971 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 972 ), 973 exp.Lead: lambda self, e: self.func( 974 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 975 ), 976 } 977 978 PROPERTIES_LOCATION = { 979 **generator.Generator.PROPERTIES_LOCATION, 980 exp.OnCluster: exp.Properties.Location.POST_NAME, 981 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 982 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 983 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 984 } 985 986 # There's no list in docs, but it can be found in Clickhouse code 987 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 988 ON_CLUSTER_TARGETS = { 989 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 990 "DATABASE", 991 "TABLE", 992 "VIEW", 993 "DICTIONARY", 994 "INDEX", 995 "FUNCTION", 996 "NAMED COLLECTION", 997 } 998 999 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1000 NON_NULLABLE_TYPES = { 1001 exp.DataType.Type.ARRAY, 1002 exp.DataType.Type.MAP, 1003 exp.DataType.Type.STRUCT, 1004 exp.DataType.Type.POINT, 1005 exp.DataType.Type.RING, 1006 exp.DataType.Type.LINESTRING, 1007 exp.DataType.Type.MULTILINESTRING, 1008 exp.DataType.Type.POLYGON, 1009 exp.DataType.Type.MULTIPOLYGON, 1010 } 1011 1012 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1013 strtodate_sql = self.function_fallback_sql(expression) 1014 1015 if not isinstance(expression.parent, exp.Cast): 1016 # StrToDate returns DATEs in other dialects (eg. postgres), so 1017 # this branch aims to improve the transpilation to clickhouse 1018 return f"CAST({strtodate_sql} AS DATE)" 1019 1020 return strtodate_sql 1021 1022 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1023 this = expression.this 1024 1025 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1026 return self.sql(this) 1027 1028 return super().cast_sql(expression, safe_prefix=safe_prefix) 1029 1030 def trycast_sql(self, expression: exp.TryCast) -> str: 1031 dtype = expression.to 1032 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1033 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1034 dtype.set("nullable", True) 1035 1036 return super().cast_sql(expression) 1037 1038 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1039 this = self.json_path_part(expression.this) 1040 return str(int(this) + 1) if is_int(this) else this 1041 1042 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1043 return f"AS {self.sql(expression, 'this')}" 1044 1045 def _any_to_has( 1046 self, 1047 expression: exp.EQ | exp.NEQ, 1048 default: t.Callable[[t.Any], str], 1049 prefix: str = "", 1050 ) -> str: 1051 if isinstance(expression.left, exp.Any): 1052 arr = expression.left 1053 this = expression.right 1054 elif isinstance(expression.right, exp.Any): 1055 arr = expression.right 1056 this = expression.left 1057 else: 1058 return default(expression) 1059 1060 return prefix + self.func("has", arr.this.unnest(), this) 1061 1062 def eq_sql(self, expression: exp.EQ) -> str: 1063 return self._any_to_has(expression, super().eq_sql) 1064 1065 def neq_sql(self, expression: exp.NEQ) -> str: 1066 return self._any_to_has(expression, super().neq_sql, "NOT ") 1067 1068 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1069 # Manually add a flag to make the search case-insensitive 1070 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1071 return self.func("match", expression.this, regex) 1072 1073 def datatype_sql(self, expression: exp.DataType) -> str: 1074 # String is the standard ClickHouse type, every other variant is just an alias. 1075 # Additionally, any supplied length parameter will be ignored. 1076 # 1077 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1078 if expression.this in self.STRING_TYPE_MAPPING: 1079 dtype = "String" 1080 else: 1081 dtype = super().datatype_sql(expression) 1082 1083 # This section changes the type to `Nullable(...)` if the following conditions hold: 1084 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1085 # and change their semantics 1086 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1087 # constraint: "Type of Map key must be a type, that can be represented by integer or 1088 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1089 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1090 parent = expression.parent 1091 nullable = expression.args.get("nullable") 1092 if nullable is True or ( 1093 nullable is None 1094 and not ( 1095 isinstance(parent, exp.DataType) 1096 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1097 and expression.index in (None, 0) 1098 ) 1099 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1100 ): 1101 dtype = f"Nullable({dtype})" 1102 1103 return dtype 1104 1105 def cte_sql(self, expression: exp.CTE) -> str: 1106 if expression.args.get("scalar"): 1107 this = self.sql(expression, "this") 1108 alias = self.sql(expression, "alias") 1109 return f"{this} AS {alias}" 1110 1111 return super().cte_sql(expression) 1112 1113 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1114 return super().after_limit_modifiers(expression) + [ 1115 ( 1116 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1117 if expression.args.get("settings") 1118 else "" 1119 ), 1120 ( 1121 self.seg("FORMAT ") + self.sql(expression, "format") 1122 if expression.args.get("format") 1123 else "" 1124 ), 1125 ] 1126 1127 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1128 params = self.expressions(expression, key="params", flat=True) 1129 return self.func(expression.name, *expression.expressions) + f"({params})" 1130 1131 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1132 return self.func(expression.name, *expression.expressions) 1133 1134 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1135 return self.anonymousaggfunc_sql(expression) 1136 1137 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1138 return self.parameterizedagg_sql(expression) 1139 1140 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1141 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1142 1143 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1144 return f"ON CLUSTER {self.sql(expression, 'this')}" 1145 1146 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1147 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1148 exp.Properties.Location.POST_NAME 1149 ): 1150 this_name = self.sql( 1151 expression.this if isinstance(expression.this, exp.Schema) else expression, 1152 "this", 1153 ) 1154 this_properties = " ".join( 1155 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1156 ) 1157 this_schema = self.schema_columns_sql(expression.this) 1158 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1159 1160 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1161 1162 return super().createable_sql(expression, locations) 1163 1164 def create_sql(self, expression: exp.Create) -> str: 1165 # The comment property comes last in CTAS statements, i.e. after the query 1166 query = expression.expression 1167 if isinstance(query, exp.Query): 1168 comment_prop = expression.find(exp.SchemaCommentProperty) 1169 if comment_prop: 1170 comment_prop.pop() 1171 query.replace(exp.paren(query)) 1172 else: 1173 comment_prop = None 1174 1175 create_sql = super().create_sql(expression) 1176 1177 comment_sql = self.sql(comment_prop) 1178 comment_sql = f" {comment_sql}" if comment_sql else "" 1179 1180 return f"{create_sql}{comment_sql}" 1181 1182 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1183 this = self.indent(self.sql(expression, "this")) 1184 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1185 1186 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1187 this = self.sql(expression, "this") 1188 this = f" {this}" if this else "" 1189 expr = self.sql(expression, "expression") 1190 expr = f" {expr}" if expr else "" 1191 index_type = self.sql(expression, "index_type") 1192 index_type = f" TYPE {index_type}" if index_type else "" 1193 granularity = self.sql(expression, "granularity") 1194 granularity = f" GRANULARITY {granularity}" if granularity else "" 1195 1196 return f"INDEX{this}{expr}{index_type}{granularity}" 1197 1198 def partition_sql(self, expression: exp.Partition) -> str: 1199 return f"PARTITION {self.expressions(expression, flat=True)}" 1200 1201 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1202 return f"ID {self.sql(expression.this)}" 1203 1204 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1205 return ( 1206 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1207 ) 1208 1209 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1210 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 169 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 170 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 171 172 UNESCAPED_SEQUENCES = { 173 "\\0": "\0", 174 } 175 176 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 177 178 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 179 exp.Except: False, 180 exp.Intersect: False, 181 exp.Union: None, 182 } 183 184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 STRING_ESCAPES = ["'", "\\"] 188 BIT_STRINGS = [("0b", "")] 189 HEX_STRINGS = [("0x", ""), ("0X", "")] 190 HEREDOC_STRINGS = ["$"] 191 192 KEYWORDS = { 193 **tokens.Tokenizer.KEYWORDS, 194 "ATTACH": TokenType.COMMAND, 195 "DATE32": TokenType.DATE32, 196 "DATETIME64": TokenType.DATETIME64, 197 "DICTIONARY": TokenType.DICTIONARY, 198 "ENUM8": TokenType.ENUM8, 199 "ENUM16": TokenType.ENUM16, 200 "FINAL": TokenType.FINAL, 201 "FIXEDSTRING": TokenType.FIXEDSTRING, 202 "FLOAT32": TokenType.FLOAT, 203 "FLOAT64": TokenType.DOUBLE, 204 "GLOBAL": TokenType.GLOBAL, 205 "INT256": TokenType.INT256, 206 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 207 "MAP": TokenType.MAP, 208 "NESTED": TokenType.NESTED, 209 "SAMPLE": TokenType.TABLE_SAMPLE, 210 "TUPLE": TokenType.STRUCT, 211 "UINT128": TokenType.UINT128, 212 "UINT16": TokenType.USMALLINT, 213 "UINT256": TokenType.UINT256, 214 "UINT32": TokenType.UINT, 215 "UINT64": TokenType.UBIGINT, 216 "UINT8": TokenType.UTINYINT, 217 "IPV4": TokenType.IPV4, 218 "IPV6": TokenType.IPV6, 219 "POINT": TokenType.POINT, 220 "RING": TokenType.RING, 221 "LINESTRING": TokenType.LINESTRING, 222 "MULTILINESTRING": TokenType.MULTILINESTRING, 223 "POLYGON": TokenType.POLYGON, 224 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 225 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 226 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 227 "SYSTEM": TokenType.COMMAND, 228 "PREWHERE": TokenType.PREWHERE, 229 } 230 KEYWORDS.pop("/*+") 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.HEREDOC_STRING, 235 } 236 237 class Parser(parser.Parser): 238 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 239 # * select x from t1 union all select x from t2 limit 1; 240 # * select x from t1 union all (select x from t2 limit 1); 241 MODIFIERS_ATTACHED_TO_SET_OP = False 242 INTERVAL_SPANS = False 243 244 FUNCTIONS = { 245 **parser.Parser.FUNCTIONS, 246 "ANY": exp.AnyValue.from_arg_list, 247 "ARRAYSUM": exp.ArraySum.from_arg_list, 248 "COUNTIF": _build_count_if, 249 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 250 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 252 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATE_FORMAT": _build_date_format, 254 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 255 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 256 "FORMATDATETIME": _build_date_format, 257 "JSONEXTRACTSTRING": build_json_extract_path( 258 exp.JSONExtractScalar, zero_based_indexing=False 259 ), 260 "MAP": parser.build_var_map, 261 "MATCH": exp.RegexpLike.from_arg_list, 262 "RANDCANONICAL": exp.Rand.from_arg_list, 263 "STR_TO_DATE": _build_str_to_date, 264 "TUPLE": exp.Struct.from_arg_list, 265 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 266 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 268 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "UNIQ": exp.ApproxDistinct.from_arg_list, 270 "XOR": lambda args: exp.Xor(expressions=args), 271 "MD5": exp.MD5Digest.from_arg_list, 272 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 273 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 274 } 275 276 AGG_FUNCTIONS = { 277 "count", 278 "min", 279 "max", 280 "sum", 281 "avg", 282 "any", 283 "stddevPop", 284 "stddevSamp", 285 "varPop", 286 "varSamp", 287 "corr", 288 "covarPop", 289 "covarSamp", 290 "entropy", 291 "exponentialMovingAverage", 292 "intervalLengthSum", 293 "kolmogorovSmirnovTest", 294 "mannWhitneyUTest", 295 "median", 296 "rankCorr", 297 "sumKahan", 298 "studentTTest", 299 "welchTTest", 300 "anyHeavy", 301 "anyLast", 302 "boundingRatio", 303 "first_value", 304 "last_value", 305 "argMin", 306 "argMax", 307 "avgWeighted", 308 "topK", 309 "topKWeighted", 310 "deltaSum", 311 "deltaSumTimestamp", 312 "groupArray", 313 "groupArrayLast", 314 "groupUniqArray", 315 "groupArrayInsertAt", 316 "groupArrayMovingAvg", 317 "groupArrayMovingSum", 318 "groupArraySample", 319 "groupBitAnd", 320 "groupBitOr", 321 "groupBitXor", 322 "groupBitmap", 323 "groupBitmapAnd", 324 "groupBitmapOr", 325 "groupBitmapXor", 326 "sumWithOverflow", 327 "sumMap", 328 "minMap", 329 "maxMap", 330 "skewSamp", 331 "skewPop", 332 "kurtSamp", 333 "kurtPop", 334 "uniq", 335 "uniqExact", 336 "uniqCombined", 337 "uniqCombined64", 338 "uniqHLL12", 339 "uniqTheta", 340 "quantile", 341 "quantiles", 342 "quantileExact", 343 "quantilesExact", 344 "quantileExactLow", 345 "quantilesExactLow", 346 "quantileExactHigh", 347 "quantilesExactHigh", 348 "quantileExactWeighted", 349 "quantilesExactWeighted", 350 "quantileTiming", 351 "quantilesTiming", 352 "quantileTimingWeighted", 353 "quantilesTimingWeighted", 354 "quantileDeterministic", 355 "quantilesDeterministic", 356 "quantileTDigest", 357 "quantilesTDigest", 358 "quantileTDigestWeighted", 359 "quantilesTDigestWeighted", 360 "quantileBFloat16", 361 "quantilesBFloat16", 362 "quantileBFloat16Weighted", 363 "quantilesBFloat16Weighted", 364 "simpleLinearRegression", 365 "stochasticLinearRegression", 366 "stochasticLogisticRegression", 367 "categoricalInformationValue", 368 "contingency", 369 "cramersV", 370 "cramersVBiasCorrected", 371 "theilsU", 372 "maxIntersections", 373 "maxIntersectionsPosition", 374 "meanZTest", 375 "quantileInterpolatedWeighted", 376 "quantilesInterpolatedWeighted", 377 "quantileGK", 378 "quantilesGK", 379 "sparkBar", 380 "sumCount", 381 "largestTriangleThreeBuckets", 382 "histogram", 383 "sequenceMatch", 384 "sequenceCount", 385 "windowFunnel", 386 "retention", 387 "uniqUpTo", 388 "sequenceNextNode", 389 "exponentialTimeDecayedAvg", 390 } 391 392 AGG_FUNCTIONS_SUFFIXES = [ 393 "If", 394 "Array", 395 "ArrayIf", 396 "Map", 397 "SimpleState", 398 "State", 399 "Merge", 400 "MergeState", 401 "ForEach", 402 "Distinct", 403 "OrDefault", 404 "OrNull", 405 "Resample", 406 "ArgMin", 407 "ArgMax", 408 ] 409 410 FUNC_TOKENS = { 411 *parser.Parser.FUNC_TOKENS, 412 TokenType.SET, 413 } 414 415 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 416 417 ID_VAR_TOKENS = { 418 *parser.Parser.ID_VAR_TOKENS, 419 TokenType.LIKE, 420 } 421 422 AGG_FUNC_MAPPING = ( 423 lambda functions, suffixes: { 424 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 425 } 426 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 427 428 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 433 "QUANTILE": lambda self: self._parse_quantile(), 434 "COLUMNS": lambda self: self._parse_columns(), 435 } 436 437 FUNCTION_PARSERS.pop("MATCH") 438 439 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 440 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 441 442 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 443 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 444 445 RANGE_PARSERS = { 446 **parser.Parser.RANGE_PARSERS, 447 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 448 and self._parse_in(this, is_global=True), 449 } 450 451 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 452 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 453 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 454 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 455 456 JOIN_KINDS = { 457 *parser.Parser.JOIN_KINDS, 458 TokenType.ANY, 459 TokenType.ASOF, 460 TokenType.ARRAY, 461 } 462 463 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 464 TokenType.ANY, 465 TokenType.ARRAY, 466 TokenType.FINAL, 467 TokenType.FORMAT, 468 TokenType.SETTINGS, 469 } 470 471 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 472 TokenType.FORMAT, 473 } 474 475 LOG_DEFAULTS_TO_LN = True 476 477 QUERY_MODIFIER_PARSERS = { 478 **parser.Parser.QUERY_MODIFIER_PARSERS, 479 TokenType.SETTINGS: lambda self: ( 480 "settings", 481 self._advance() or self._parse_csv(self._parse_assignment), 482 ), 483 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 484 } 485 486 CONSTRAINT_PARSERS = { 487 **parser.Parser.CONSTRAINT_PARSERS, 488 "INDEX": lambda self: self._parse_index_constraint(), 489 "CODEC": lambda self: self._parse_compress(), 490 } 491 492 ALTER_PARSERS = { 493 **parser.Parser.ALTER_PARSERS, 494 "REPLACE": lambda self: self._parse_alter_table_replace(), 495 } 496 497 SCHEMA_UNNAMED_CONSTRAINTS = { 498 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 499 "INDEX", 500 } 501 502 PLACEHOLDER_PARSERS = { 503 **parser.Parser.PLACEHOLDER_PARSERS, 504 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 505 } 506 507 def _parse_types( 508 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 509 ) -> t.Optional[exp.Expression]: 510 dtype = super()._parse_types( 511 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 512 ) 513 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 514 # Mark every type as non-nullable which is ClickHouse's default, unless it's 515 # already marked as nullable. This marker helps us transpile types from other 516 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 517 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 518 # fail in ClickHouse without the `Nullable` type constructor. 519 dtype.set("nullable", False) 520 521 return dtype 522 523 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 524 index = self._index 525 this = self._parse_bitwise() 526 if self._match(TokenType.FROM): 527 self._retreat(index) 528 return super()._parse_extract() 529 530 # We return Anonymous here because extract and regexpExtract have different semantics, 531 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 532 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 533 # 534 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 535 self._match(TokenType.COMMA) 536 return self.expression( 537 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 538 ) 539 540 def _parse_assignment(self) -> t.Optional[exp.Expression]: 541 this = super()._parse_assignment() 542 543 if self._match(TokenType.PLACEHOLDER): 544 return self.expression( 545 exp.If, 546 this=this, 547 true=self._parse_assignment(), 548 false=self._match(TokenType.COLON) and self._parse_assignment(), 549 ) 550 551 return this 552 553 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 554 """ 555 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 556 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 557 """ 558 this = self._parse_id_var() 559 self._match(TokenType.COLON) 560 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 561 self._match_text_seq("IDENTIFIER") and "Identifier" 562 ) 563 564 if not kind: 565 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 566 elif not self._match(TokenType.R_BRACE): 567 self.raise_error("Expecting }") 568 569 return self.expression(exp.Placeholder, this=this, kind=kind) 570 571 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 572 this = super()._parse_in(this) 573 this.set("is_global", is_global) 574 return this 575 576 def _parse_table( 577 self, 578 schema: bool = False, 579 joins: bool = False, 580 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 581 parse_bracket: bool = False, 582 is_db_reference: bool = False, 583 parse_partition: bool = False, 584 ) -> t.Optional[exp.Expression]: 585 this = super()._parse_table( 586 schema=schema, 587 joins=joins, 588 alias_tokens=alias_tokens, 589 parse_bracket=parse_bracket, 590 is_db_reference=is_db_reference, 591 ) 592 593 if self._match(TokenType.FINAL): 594 this = self.expression(exp.Final, this=this) 595 596 return this 597 598 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 599 return super()._parse_position(haystack_first=True) 600 601 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 602 def _parse_cte(self) -> exp.CTE: 603 # WITH <identifier> AS <subquery expression> 604 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 605 606 if not cte: 607 # WITH <expression> AS <identifier> 608 cte = self.expression( 609 exp.CTE, 610 this=self._parse_assignment(), 611 alias=self._parse_table_alias(), 612 scalar=True, 613 ) 614 615 return cte 616 617 def _parse_join_parts( 618 self, 619 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 620 is_global = self._match(TokenType.GLOBAL) and self._prev 621 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 622 623 if kind_pre: 624 kind = self._match_set(self.JOIN_KINDS) and self._prev 625 side = self._match_set(self.JOIN_SIDES) and self._prev 626 return is_global, side, kind 627 628 return ( 629 is_global, 630 self._match_set(self.JOIN_SIDES) and self._prev, 631 self._match_set(self.JOIN_KINDS) and self._prev, 632 ) 633 634 def _parse_join( 635 self, skip_join_token: bool = False, parse_bracket: bool = False 636 ) -> t.Optional[exp.Join]: 637 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 638 if join: 639 join.set("global", join.args.pop("method", None)) 640 641 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 642 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 643 if join.kind == "ARRAY": 644 for table in join.find_all(exp.Table): 645 table.replace(table.to_column()) 646 647 return join 648 649 def _parse_function( 650 self, 651 functions: t.Optional[t.Dict[str, t.Callable]] = None, 652 anonymous: bool = False, 653 optional_parens: bool = True, 654 any_token: bool = False, 655 ) -> t.Optional[exp.Expression]: 656 expr = super()._parse_function( 657 functions=functions, 658 anonymous=anonymous, 659 optional_parens=optional_parens, 660 any_token=any_token, 661 ) 662 663 func = expr.this if isinstance(expr, exp.Window) else expr 664 665 # Aggregate functions can be split in 2 parts: <func_name><suffix> 666 parts = ( 667 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 668 ) 669 670 if parts: 671 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 672 params = self._parse_func_params(anon_func) 673 674 kwargs = { 675 "this": anon_func.this, 676 "expressions": anon_func.expressions, 677 } 678 if parts[1]: 679 kwargs["parts"] = parts 680 exp_class: t.Type[exp.Expression] = ( 681 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 682 ) 683 else: 684 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 685 686 kwargs["exp_class"] = exp_class 687 if params: 688 kwargs["params"] = params 689 690 func = self.expression(**kwargs) 691 692 if isinstance(expr, exp.Window): 693 # The window's func was parsed as Anonymous in base parser, fix its 694 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 695 expr.set("this", func) 696 elif params: 697 # Params have blocked super()._parse_function() from parsing the following window 698 # (if that exists) as they're standing between the function call and the window spec 699 expr = self._parse_window(func) 700 else: 701 expr = func 702 703 return expr 704 705 def _parse_func_params( 706 self, this: t.Optional[exp.Func] = None 707 ) -> t.Optional[t.List[exp.Expression]]: 708 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 709 return self._parse_csv(self._parse_lambda) 710 711 if self._match(TokenType.L_PAREN): 712 params = self._parse_csv(self._parse_lambda) 713 self._match_r_paren(this) 714 return params 715 716 return None 717 718 def _parse_quantile(self) -> exp.Quantile: 719 this = self._parse_lambda() 720 params = self._parse_func_params() 721 if params: 722 return self.expression(exp.Quantile, this=params[0], quantile=this) 723 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 724 725 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 726 return super()._parse_wrapped_id_vars(optional=True) 727 728 def _parse_primary_key( 729 self, wrapped_optional: bool = False, in_props: bool = False 730 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 731 return super()._parse_primary_key( 732 wrapped_optional=wrapped_optional or in_props, in_props=in_props 733 ) 734 735 def _parse_on_property(self) -> t.Optional[exp.Expression]: 736 index = self._index 737 if self._match_text_seq("CLUSTER"): 738 this = self._parse_id_var() 739 if this: 740 return self.expression(exp.OnCluster, this=this) 741 else: 742 self._retreat(index) 743 return None 744 745 def _parse_index_constraint( 746 self, kind: t.Optional[str] = None 747 ) -> exp.IndexColumnConstraint: 748 # INDEX name1 expr TYPE type1(args) GRANULARITY value 749 this = self._parse_id_var() 750 expression = self._parse_assignment() 751 752 index_type = self._match_text_seq("TYPE") and ( 753 self._parse_function() or self._parse_var() 754 ) 755 756 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 757 758 return self.expression( 759 exp.IndexColumnConstraint, 760 this=this, 761 expression=expression, 762 index_type=index_type, 763 granularity=granularity, 764 ) 765 766 def _parse_partition(self) -> t.Optional[exp.Partition]: 767 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 768 if not self._match(TokenType.PARTITION): 769 return None 770 771 if self._match_text_seq("ID"): 772 # Corresponds to the PARTITION ID <string_value> syntax 773 expressions: t.List[exp.Expression] = [ 774 self.expression(exp.PartitionId, this=self._parse_string()) 775 ] 776 else: 777 expressions = self._parse_expressions() 778 779 return self.expression(exp.Partition, expressions=expressions) 780 781 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 782 partition = self._parse_partition() 783 784 if not partition or not self._match(TokenType.FROM): 785 return None 786 787 return self.expression( 788 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 789 ) 790 791 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 792 if not self._match_text_seq("PROJECTION"): 793 return None 794 795 return self.expression( 796 exp.ProjectionDef, 797 this=self._parse_id_var(), 798 expression=self._parse_wrapped(self._parse_statement), 799 ) 800 801 def _parse_constraint(self) -> t.Optional[exp.Expression]: 802 return super()._parse_constraint() or self._parse_projection_def() 803 804 def _parse_alias( 805 self, this: t.Optional[exp.Expression], explicit: bool = False 806 ) -> t.Optional[exp.Expression]: 807 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 808 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 809 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 810 return this 811 812 return super()._parse_alias(this=this, explicit=explicit) 813 814 def _parse_expression(self) -> t.Optional[exp.Expression]: 815 this = super()._parse_expression() 816 817 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 818 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 819 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 820 self._match(TokenType.R_PAREN) 821 822 return this 823 824 def _parse_columns(self) -> exp.Expression: 825 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 826 827 while self._next and self._match_text_seq(")", "APPLY", "("): 828 self._match(TokenType.R_PAREN) 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 return this 831 832 class Generator(generator.Generator): 833 QUERY_HINTS = False 834 STRUCT_DELIMITER = ("(", ")") 835 NVL2_SUPPORTED = False 836 TABLESAMPLE_REQUIRES_PARENS = False 837 TABLESAMPLE_SIZE_IS_ROWS = False 838 TABLESAMPLE_KEYWORDS = "SAMPLE" 839 LAST_DAY_SUPPORTS_DATE_PART = False 840 CAN_IMPLEMENT_ARRAY_ANY = True 841 SUPPORTS_TO_NUMBER = False 842 JOIN_HINTS = False 843 TABLE_HINTS = False 844 GROUPINGS_SEP = "" 845 SET_OP_MODIFIERS = False 846 SUPPORTS_TABLE_ALIAS_COLUMNS = False 847 VALUES_AS_TABLE = False 848 849 STRING_TYPE_MAPPING = { 850 exp.DataType.Type.CHAR: "String", 851 exp.DataType.Type.LONGBLOB: "String", 852 exp.DataType.Type.LONGTEXT: "String", 853 exp.DataType.Type.MEDIUMBLOB: "String", 854 exp.DataType.Type.MEDIUMTEXT: "String", 855 exp.DataType.Type.TINYBLOB: "String", 856 exp.DataType.Type.TINYTEXT: "String", 857 exp.DataType.Type.TEXT: "String", 858 exp.DataType.Type.VARBINARY: "String", 859 exp.DataType.Type.VARCHAR: "String", 860 } 861 862 SUPPORTED_JSON_PATH_PARTS = { 863 exp.JSONPathKey, 864 exp.JSONPathRoot, 865 exp.JSONPathSubscript, 866 } 867 868 TYPE_MAPPING = { 869 **generator.Generator.TYPE_MAPPING, 870 **STRING_TYPE_MAPPING, 871 exp.DataType.Type.ARRAY: "Array", 872 exp.DataType.Type.BOOLEAN: "Bool", 873 exp.DataType.Type.BIGINT: "Int64", 874 exp.DataType.Type.DATE32: "Date32", 875 exp.DataType.Type.DATETIME: "DateTime", 876 exp.DataType.Type.DATETIME64: "DateTime64", 877 exp.DataType.Type.TIMESTAMP: "DateTime", 878 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 879 exp.DataType.Type.DOUBLE: "Float64", 880 exp.DataType.Type.ENUM: "Enum", 881 exp.DataType.Type.ENUM8: "Enum8", 882 exp.DataType.Type.ENUM16: "Enum16", 883 exp.DataType.Type.FIXEDSTRING: "FixedString", 884 exp.DataType.Type.FLOAT: "Float32", 885 exp.DataType.Type.INT: "Int32", 886 exp.DataType.Type.MEDIUMINT: "Int32", 887 exp.DataType.Type.INT128: "Int128", 888 exp.DataType.Type.INT256: "Int256", 889 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 890 exp.DataType.Type.MAP: "Map", 891 exp.DataType.Type.NESTED: "Nested", 892 exp.DataType.Type.SMALLINT: "Int16", 893 exp.DataType.Type.STRUCT: "Tuple", 894 exp.DataType.Type.TINYINT: "Int8", 895 exp.DataType.Type.UBIGINT: "UInt64", 896 exp.DataType.Type.UINT: "UInt32", 897 exp.DataType.Type.UINT128: "UInt128", 898 exp.DataType.Type.UINT256: "UInt256", 899 exp.DataType.Type.USMALLINT: "UInt16", 900 exp.DataType.Type.UTINYINT: "UInt8", 901 exp.DataType.Type.IPV4: "IPv4", 902 exp.DataType.Type.IPV6: "IPv6", 903 exp.DataType.Type.POINT: "Point", 904 exp.DataType.Type.RING: "Ring", 905 exp.DataType.Type.LINESTRING: "LineString", 906 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 907 exp.DataType.Type.POLYGON: "Polygon", 908 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 909 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 910 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 911 } 912 913 TRANSFORMS = { 914 **generator.Generator.TRANSFORMS, 915 exp.AnyValue: rename_func("any"), 916 exp.ApproxDistinct: rename_func("uniq"), 917 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 918 exp.ArraySize: rename_func("LENGTH"), 919 exp.ArraySum: rename_func("arraySum"), 920 exp.ArgMax: arg_max_or_min_no_count("argMax"), 921 exp.ArgMin: arg_max_or_min_no_count("argMin"), 922 exp.Array: inline_array_sql, 923 exp.CastToStrType: rename_func("CAST"), 924 exp.CountIf: rename_func("countIf"), 925 exp.CompressColumnConstraint: lambda self, 926 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 927 exp.ComputedColumnConstraint: lambda self, 928 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 929 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 930 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 931 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 932 exp.DateStrToDate: rename_func("toDate"), 933 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 934 exp.Explode: rename_func("arrayJoin"), 935 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 936 exp.IsNan: rename_func("isNaN"), 937 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 938 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 939 exp.JSONPathKey: json_path_key_only_name, 940 exp.JSONPathRoot: lambda *_: "", 941 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 942 exp.Nullif: rename_func("nullIf"), 943 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 944 exp.Pivot: no_pivot_sql, 945 exp.Quantile: _quantile_sql, 946 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 947 exp.Rand: rename_func("randCanonical"), 948 exp.StartsWith: rename_func("startsWith"), 949 exp.StrPosition: lambda self, e: self.func( 950 "position", e.this, e.args.get("substr"), e.args.get("position") 951 ), 952 exp.TimeToStr: lambda self, e: self.func( 953 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 954 ), 955 exp.TimeStrToTime: _timestrtotime_sql, 956 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 957 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 958 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 959 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 960 exp.MD5Digest: rename_func("MD5"), 961 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 962 exp.SHA: rename_func("SHA1"), 963 exp.SHA2: sha256_sql, 964 exp.UnixToTime: _unix_to_time_sql, 965 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 966 exp.Trim: trim_sql, 967 exp.Variance: rename_func("varSamp"), 968 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 969 exp.Stddev: rename_func("stddevSamp"), 970 exp.Chr: rename_func("CHAR"), 971 exp.Lag: lambda self, e: self.func( 972 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 973 ), 974 exp.Lead: lambda self, e: self.func( 975 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 976 ), 977 } 978 979 PROPERTIES_LOCATION = { 980 **generator.Generator.PROPERTIES_LOCATION, 981 exp.OnCluster: exp.Properties.Location.POST_NAME, 982 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 983 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 984 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 985 } 986 987 # There's no list in docs, but it can be found in Clickhouse code 988 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 989 ON_CLUSTER_TARGETS = { 990 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 991 "DATABASE", 992 "TABLE", 993 "VIEW", 994 "DICTIONARY", 995 "INDEX", 996 "FUNCTION", 997 "NAMED COLLECTION", 998 } 999 1000 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1001 NON_NULLABLE_TYPES = { 1002 exp.DataType.Type.ARRAY, 1003 exp.DataType.Type.MAP, 1004 exp.DataType.Type.STRUCT, 1005 exp.DataType.Type.POINT, 1006 exp.DataType.Type.RING, 1007 exp.DataType.Type.LINESTRING, 1008 exp.DataType.Type.MULTILINESTRING, 1009 exp.DataType.Type.POLYGON, 1010 exp.DataType.Type.MULTIPOLYGON, 1011 } 1012 1013 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1014 strtodate_sql = self.function_fallback_sql(expression) 1015 1016 if not isinstance(expression.parent, exp.Cast): 1017 # StrToDate returns DATEs in other dialects (eg. postgres), so 1018 # this branch aims to improve the transpilation to clickhouse 1019 return f"CAST({strtodate_sql} AS DATE)" 1020 1021 return strtodate_sql 1022 1023 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1024 this = expression.this 1025 1026 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1027 return self.sql(this) 1028 1029 return super().cast_sql(expression, safe_prefix=safe_prefix) 1030 1031 def trycast_sql(self, expression: exp.TryCast) -> str: 1032 dtype = expression.to 1033 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1034 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1035 dtype.set("nullable", True) 1036 1037 return super().cast_sql(expression) 1038 1039 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1040 this = self.json_path_part(expression.this) 1041 return str(int(this) + 1) if is_int(this) else this 1042 1043 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1044 return f"AS {self.sql(expression, 'this')}" 1045 1046 def _any_to_has( 1047 self, 1048 expression: exp.EQ | exp.NEQ, 1049 default: t.Callable[[t.Any], str], 1050 prefix: str = "", 1051 ) -> str: 1052 if isinstance(expression.left, exp.Any): 1053 arr = expression.left 1054 this = expression.right 1055 elif isinstance(expression.right, exp.Any): 1056 arr = expression.right 1057 this = expression.left 1058 else: 1059 return default(expression) 1060 1061 return prefix + self.func("has", arr.this.unnest(), this) 1062 1063 def eq_sql(self, expression: exp.EQ) -> str: 1064 return self._any_to_has(expression, super().eq_sql) 1065 1066 def neq_sql(self, expression: exp.NEQ) -> str: 1067 return self._any_to_has(expression, super().neq_sql, "NOT ") 1068 1069 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1070 # Manually add a flag to make the search case-insensitive 1071 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1072 return self.func("match", expression.this, regex) 1073 1074 def datatype_sql(self, expression: exp.DataType) -> str: 1075 # String is the standard ClickHouse type, every other variant is just an alias. 1076 # Additionally, any supplied length parameter will be ignored. 1077 # 1078 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1079 if expression.this in self.STRING_TYPE_MAPPING: 1080 dtype = "String" 1081 else: 1082 dtype = super().datatype_sql(expression) 1083 1084 # This section changes the type to `Nullable(...)` if the following conditions hold: 1085 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1086 # and change their semantics 1087 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1088 # constraint: "Type of Map key must be a type, that can be represented by integer or 1089 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1090 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1091 parent = expression.parent 1092 nullable = expression.args.get("nullable") 1093 if nullable is True or ( 1094 nullable is None 1095 and not ( 1096 isinstance(parent, exp.DataType) 1097 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1098 and expression.index in (None, 0) 1099 ) 1100 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1101 ): 1102 dtype = f"Nullable({dtype})" 1103 1104 return dtype 1105 1106 def cte_sql(self, expression: exp.CTE) -> str: 1107 if expression.args.get("scalar"): 1108 this = self.sql(expression, "this") 1109 alias = self.sql(expression, "alias") 1110 return f"{this} AS {alias}" 1111 1112 return super().cte_sql(expression) 1113 1114 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1115 return super().after_limit_modifiers(expression) + [ 1116 ( 1117 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1118 if expression.args.get("settings") 1119 else "" 1120 ), 1121 ( 1122 self.seg("FORMAT ") + self.sql(expression, "format") 1123 if expression.args.get("format") 1124 else "" 1125 ), 1126 ] 1127 1128 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1129 params = self.expressions(expression, key="params", flat=True) 1130 return self.func(expression.name, *expression.expressions) + f"({params})" 1131 1132 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1133 return self.func(expression.name, *expression.expressions) 1134 1135 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1136 return self.anonymousaggfunc_sql(expression) 1137 1138 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1139 return self.parameterizedagg_sql(expression) 1140 1141 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1142 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1143 1144 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1145 return f"ON CLUSTER {self.sql(expression, 'this')}" 1146 1147 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1148 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1149 exp.Properties.Location.POST_NAME 1150 ): 1151 this_name = self.sql( 1152 expression.this if isinstance(expression.this, exp.Schema) else expression, 1153 "this", 1154 ) 1155 this_properties = " ".join( 1156 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1157 ) 1158 this_schema = self.schema_columns_sql(expression.this) 1159 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1160 1161 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1162 1163 return super().createable_sql(expression, locations) 1164 1165 def create_sql(self, expression: exp.Create) -> str: 1166 # The comment property comes last in CTAS statements, i.e. after the query 1167 query = expression.expression 1168 if isinstance(query, exp.Query): 1169 comment_prop = expression.find(exp.SchemaCommentProperty) 1170 if comment_prop: 1171 comment_prop.pop() 1172 query.replace(exp.paren(query)) 1173 else: 1174 comment_prop = None 1175 1176 create_sql = super().create_sql(expression) 1177 1178 comment_sql = self.sql(comment_prop) 1179 comment_sql = f" {comment_sql}" if comment_sql else "" 1180 1181 return f"{create_sql}{comment_sql}" 1182 1183 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1184 this = self.indent(self.sql(expression, "this")) 1185 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1186 1187 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1188 this = self.sql(expression, "this") 1189 this = f" {this}" if this else "" 1190 expr = self.sql(expression, "expression") 1191 expr = f" {expr}" if expr else "" 1192 index_type = self.sql(expression, "index_type") 1193 index_type = f" TYPE {index_type}" if index_type else "" 1194 granularity = self.sql(expression, "granularity") 1195 granularity = f" GRANULARITY {granularity}" if granularity else "" 1196 1197 return f"INDEX{this}{expr}{index_type}{granularity}" 1198 1199 def partition_sql(self, expression: exp.Partition) -> str: 1200 return f"PARTITION {self.expressions(expression, flat=True)}" 1201 1202 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1203 return f"ID {self.sql(expression.this)}" 1204 1205 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1206 return ( 1207 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1208 ) 1209 1210 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1211 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 STRING_ESCAPES = ["'", "\\"] 188 BIT_STRINGS = [("0b", "")] 189 HEX_STRINGS = [("0x", ""), ("0X", "")] 190 HEREDOC_STRINGS = ["$"] 191 192 KEYWORDS = { 193 **tokens.Tokenizer.KEYWORDS, 194 "ATTACH": TokenType.COMMAND, 195 "DATE32": TokenType.DATE32, 196 "DATETIME64": TokenType.DATETIME64, 197 "DICTIONARY": TokenType.DICTIONARY, 198 "ENUM8": TokenType.ENUM8, 199 "ENUM16": TokenType.ENUM16, 200 "FINAL": TokenType.FINAL, 201 "FIXEDSTRING": TokenType.FIXEDSTRING, 202 "FLOAT32": TokenType.FLOAT, 203 "FLOAT64": TokenType.DOUBLE, 204 "GLOBAL": TokenType.GLOBAL, 205 "INT256": TokenType.INT256, 206 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 207 "MAP": TokenType.MAP, 208 "NESTED": TokenType.NESTED, 209 "SAMPLE": TokenType.TABLE_SAMPLE, 210 "TUPLE": TokenType.STRUCT, 211 "UINT128": TokenType.UINT128, 212 "UINT16": TokenType.USMALLINT, 213 "UINT256": TokenType.UINT256, 214 "UINT32": TokenType.UINT, 215 "UINT64": TokenType.UBIGINT, 216 "UINT8": TokenType.UTINYINT, 217 "IPV4": TokenType.IPV4, 218 "IPV6": TokenType.IPV6, 219 "POINT": TokenType.POINT, 220 "RING": TokenType.RING, 221 "LINESTRING": TokenType.LINESTRING, 222 "MULTILINESTRING": TokenType.MULTILINESTRING, 223 "POLYGON": TokenType.POLYGON, 224 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 225 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 226 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 227 "SYSTEM": TokenType.COMMAND, 228 "PREWHERE": TokenType.PREWHERE, 229 } 230 KEYWORDS.pop("/*+") 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.HEREDOC_STRING, 235 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
237 class Parser(parser.Parser): 238 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 239 # * select x from t1 union all select x from t2 limit 1; 240 # * select x from t1 union all (select x from t2 limit 1); 241 MODIFIERS_ATTACHED_TO_SET_OP = False 242 INTERVAL_SPANS = False 243 244 FUNCTIONS = { 245 **parser.Parser.FUNCTIONS, 246 "ANY": exp.AnyValue.from_arg_list, 247 "ARRAYSUM": exp.ArraySum.from_arg_list, 248 "COUNTIF": _build_count_if, 249 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 250 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 252 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATE_FORMAT": _build_date_format, 254 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 255 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 256 "FORMATDATETIME": _build_date_format, 257 "JSONEXTRACTSTRING": build_json_extract_path( 258 exp.JSONExtractScalar, zero_based_indexing=False 259 ), 260 "MAP": parser.build_var_map, 261 "MATCH": exp.RegexpLike.from_arg_list, 262 "RANDCANONICAL": exp.Rand.from_arg_list, 263 "STR_TO_DATE": _build_str_to_date, 264 "TUPLE": exp.Struct.from_arg_list, 265 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 266 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 268 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "UNIQ": exp.ApproxDistinct.from_arg_list, 270 "XOR": lambda args: exp.Xor(expressions=args), 271 "MD5": exp.MD5Digest.from_arg_list, 272 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 273 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 274 } 275 276 AGG_FUNCTIONS = { 277 "count", 278 "min", 279 "max", 280 "sum", 281 "avg", 282 "any", 283 "stddevPop", 284 "stddevSamp", 285 "varPop", 286 "varSamp", 287 "corr", 288 "covarPop", 289 "covarSamp", 290 "entropy", 291 "exponentialMovingAverage", 292 "intervalLengthSum", 293 "kolmogorovSmirnovTest", 294 "mannWhitneyUTest", 295 "median", 296 "rankCorr", 297 "sumKahan", 298 "studentTTest", 299 "welchTTest", 300 "anyHeavy", 301 "anyLast", 302 "boundingRatio", 303 "first_value", 304 "last_value", 305 "argMin", 306 "argMax", 307 "avgWeighted", 308 "topK", 309 "topKWeighted", 310 "deltaSum", 311 "deltaSumTimestamp", 312 "groupArray", 313 "groupArrayLast", 314 "groupUniqArray", 315 "groupArrayInsertAt", 316 "groupArrayMovingAvg", 317 "groupArrayMovingSum", 318 "groupArraySample", 319 "groupBitAnd", 320 "groupBitOr", 321 "groupBitXor", 322 "groupBitmap", 323 "groupBitmapAnd", 324 "groupBitmapOr", 325 "groupBitmapXor", 326 "sumWithOverflow", 327 "sumMap", 328 "minMap", 329 "maxMap", 330 "skewSamp", 331 "skewPop", 332 "kurtSamp", 333 "kurtPop", 334 "uniq", 335 "uniqExact", 336 "uniqCombined", 337 "uniqCombined64", 338 "uniqHLL12", 339 "uniqTheta", 340 "quantile", 341 "quantiles", 342 "quantileExact", 343 "quantilesExact", 344 "quantileExactLow", 345 "quantilesExactLow", 346 "quantileExactHigh", 347 "quantilesExactHigh", 348 "quantileExactWeighted", 349 "quantilesExactWeighted", 350 "quantileTiming", 351 "quantilesTiming", 352 "quantileTimingWeighted", 353 "quantilesTimingWeighted", 354 "quantileDeterministic", 355 "quantilesDeterministic", 356 "quantileTDigest", 357 "quantilesTDigest", 358 "quantileTDigestWeighted", 359 "quantilesTDigestWeighted", 360 "quantileBFloat16", 361 "quantilesBFloat16", 362 "quantileBFloat16Weighted", 363 "quantilesBFloat16Weighted", 364 "simpleLinearRegression", 365 "stochasticLinearRegression", 366 "stochasticLogisticRegression", 367 "categoricalInformationValue", 368 "contingency", 369 "cramersV", 370 "cramersVBiasCorrected", 371 "theilsU", 372 "maxIntersections", 373 "maxIntersectionsPosition", 374 "meanZTest", 375 "quantileInterpolatedWeighted", 376 "quantilesInterpolatedWeighted", 377 "quantileGK", 378 "quantilesGK", 379 "sparkBar", 380 "sumCount", 381 "largestTriangleThreeBuckets", 382 "histogram", 383 "sequenceMatch", 384 "sequenceCount", 385 "windowFunnel", 386 "retention", 387 "uniqUpTo", 388 "sequenceNextNode", 389 "exponentialTimeDecayedAvg", 390 } 391 392 AGG_FUNCTIONS_SUFFIXES = [ 393 "If", 394 "Array", 395 "ArrayIf", 396 "Map", 397 "SimpleState", 398 "State", 399 "Merge", 400 "MergeState", 401 "ForEach", 402 "Distinct", 403 "OrDefault", 404 "OrNull", 405 "Resample", 406 "ArgMin", 407 "ArgMax", 408 ] 409 410 FUNC_TOKENS = { 411 *parser.Parser.FUNC_TOKENS, 412 TokenType.SET, 413 } 414 415 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 416 417 ID_VAR_TOKENS = { 418 *parser.Parser.ID_VAR_TOKENS, 419 TokenType.LIKE, 420 } 421 422 AGG_FUNC_MAPPING = ( 423 lambda functions, suffixes: { 424 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 425 } 426 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 427 428 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 433 "QUANTILE": lambda self: self._parse_quantile(), 434 "COLUMNS": lambda self: self._parse_columns(), 435 } 436 437 FUNCTION_PARSERS.pop("MATCH") 438 439 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 440 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 441 442 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 443 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 444 445 RANGE_PARSERS = { 446 **parser.Parser.RANGE_PARSERS, 447 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 448 and self._parse_in(this, is_global=True), 449 } 450 451 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 452 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 453 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 454 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 455 456 JOIN_KINDS = { 457 *parser.Parser.JOIN_KINDS, 458 TokenType.ANY, 459 TokenType.ASOF, 460 TokenType.ARRAY, 461 } 462 463 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 464 TokenType.ANY, 465 TokenType.ARRAY, 466 TokenType.FINAL, 467 TokenType.FORMAT, 468 TokenType.SETTINGS, 469 } 470 471 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 472 TokenType.FORMAT, 473 } 474 475 LOG_DEFAULTS_TO_LN = True 476 477 QUERY_MODIFIER_PARSERS = { 478 **parser.Parser.QUERY_MODIFIER_PARSERS, 479 TokenType.SETTINGS: lambda self: ( 480 "settings", 481 self._advance() or self._parse_csv(self._parse_assignment), 482 ), 483 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 484 } 485 486 CONSTRAINT_PARSERS = { 487 **parser.Parser.CONSTRAINT_PARSERS, 488 "INDEX": lambda self: self._parse_index_constraint(), 489 "CODEC": lambda self: self._parse_compress(), 490 } 491 492 ALTER_PARSERS = { 493 **parser.Parser.ALTER_PARSERS, 494 "REPLACE": lambda self: self._parse_alter_table_replace(), 495 } 496 497 SCHEMA_UNNAMED_CONSTRAINTS = { 498 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 499 "INDEX", 500 } 501 502 PLACEHOLDER_PARSERS = { 503 **parser.Parser.PLACEHOLDER_PARSERS, 504 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 505 } 506 507 def _parse_types( 508 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 509 ) -> t.Optional[exp.Expression]: 510 dtype = super()._parse_types( 511 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 512 ) 513 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 514 # Mark every type as non-nullable which is ClickHouse's default, unless it's 515 # already marked as nullable. This marker helps us transpile types from other 516 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 517 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 518 # fail in ClickHouse without the `Nullable` type constructor. 519 dtype.set("nullable", False) 520 521 return dtype 522 523 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 524 index = self._index 525 this = self._parse_bitwise() 526 if self._match(TokenType.FROM): 527 self._retreat(index) 528 return super()._parse_extract() 529 530 # We return Anonymous here because extract and regexpExtract have different semantics, 531 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 532 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 533 # 534 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 535 self._match(TokenType.COMMA) 536 return self.expression( 537 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 538 ) 539 540 def _parse_assignment(self) -> t.Optional[exp.Expression]: 541 this = super()._parse_assignment() 542 543 if self._match(TokenType.PLACEHOLDER): 544 return self.expression( 545 exp.If, 546 this=this, 547 true=self._parse_assignment(), 548 false=self._match(TokenType.COLON) and self._parse_assignment(), 549 ) 550 551 return this 552 553 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 554 """ 555 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 556 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 557 """ 558 this = self._parse_id_var() 559 self._match(TokenType.COLON) 560 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 561 self._match_text_seq("IDENTIFIER") and "Identifier" 562 ) 563 564 if not kind: 565 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 566 elif not self._match(TokenType.R_BRACE): 567 self.raise_error("Expecting }") 568 569 return self.expression(exp.Placeholder, this=this, kind=kind) 570 571 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 572 this = super()._parse_in(this) 573 this.set("is_global", is_global) 574 return this 575 576 def _parse_table( 577 self, 578 schema: bool = False, 579 joins: bool = False, 580 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 581 parse_bracket: bool = False, 582 is_db_reference: bool = False, 583 parse_partition: bool = False, 584 ) -> t.Optional[exp.Expression]: 585 this = super()._parse_table( 586 schema=schema, 587 joins=joins, 588 alias_tokens=alias_tokens, 589 parse_bracket=parse_bracket, 590 is_db_reference=is_db_reference, 591 ) 592 593 if self._match(TokenType.FINAL): 594 this = self.expression(exp.Final, this=this) 595 596 return this 597 598 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 599 return super()._parse_position(haystack_first=True) 600 601 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 602 def _parse_cte(self) -> exp.CTE: 603 # WITH <identifier> AS <subquery expression> 604 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 605 606 if not cte: 607 # WITH <expression> AS <identifier> 608 cte = self.expression( 609 exp.CTE, 610 this=self._parse_assignment(), 611 alias=self._parse_table_alias(), 612 scalar=True, 613 ) 614 615 return cte 616 617 def _parse_join_parts( 618 self, 619 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 620 is_global = self._match(TokenType.GLOBAL) and self._prev 621 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 622 623 if kind_pre: 624 kind = self._match_set(self.JOIN_KINDS) and self._prev 625 side = self._match_set(self.JOIN_SIDES) and self._prev 626 return is_global, side, kind 627 628 return ( 629 is_global, 630 self._match_set(self.JOIN_SIDES) and self._prev, 631 self._match_set(self.JOIN_KINDS) and self._prev, 632 ) 633 634 def _parse_join( 635 self, skip_join_token: bool = False, parse_bracket: bool = False 636 ) -> t.Optional[exp.Join]: 637 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 638 if join: 639 join.set("global", join.args.pop("method", None)) 640 641 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 642 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 643 if join.kind == "ARRAY": 644 for table in join.find_all(exp.Table): 645 table.replace(table.to_column()) 646 647 return join 648 649 def _parse_function( 650 self, 651 functions: t.Optional[t.Dict[str, t.Callable]] = None, 652 anonymous: bool = False, 653 optional_parens: bool = True, 654 any_token: bool = False, 655 ) -> t.Optional[exp.Expression]: 656 expr = super()._parse_function( 657 functions=functions, 658 anonymous=anonymous, 659 optional_parens=optional_parens, 660 any_token=any_token, 661 ) 662 663 func = expr.this if isinstance(expr, exp.Window) else expr 664 665 # Aggregate functions can be split in 2 parts: <func_name><suffix> 666 parts = ( 667 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 668 ) 669 670 if parts: 671 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 672 params = self._parse_func_params(anon_func) 673 674 kwargs = { 675 "this": anon_func.this, 676 "expressions": anon_func.expressions, 677 } 678 if parts[1]: 679 kwargs["parts"] = parts 680 exp_class: t.Type[exp.Expression] = ( 681 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 682 ) 683 else: 684 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 685 686 kwargs["exp_class"] = exp_class 687 if params: 688 kwargs["params"] = params 689 690 func = self.expression(**kwargs) 691 692 if isinstance(expr, exp.Window): 693 # The window's func was parsed as Anonymous in base parser, fix its 694 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 695 expr.set("this", func) 696 elif params: 697 # Params have blocked super()._parse_function() from parsing the following window 698 # (if that exists) as they're standing between the function call and the window spec 699 expr = self._parse_window(func) 700 else: 701 expr = func 702 703 return expr 704 705 def _parse_func_params( 706 self, this: t.Optional[exp.Func] = None 707 ) -> t.Optional[t.List[exp.Expression]]: 708 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 709 return self._parse_csv(self._parse_lambda) 710 711 if self._match(TokenType.L_PAREN): 712 params = self._parse_csv(self._parse_lambda) 713 self._match_r_paren(this) 714 return params 715 716 return None 717 718 def _parse_quantile(self) -> exp.Quantile: 719 this = self._parse_lambda() 720 params = self._parse_func_params() 721 if params: 722 return self.expression(exp.Quantile, this=params[0], quantile=this) 723 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 724 725 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 726 return super()._parse_wrapped_id_vars(optional=True) 727 728 def _parse_primary_key( 729 self, wrapped_optional: bool = False, in_props: bool = False 730 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 731 return super()._parse_primary_key( 732 wrapped_optional=wrapped_optional or in_props, in_props=in_props 733 ) 734 735 def _parse_on_property(self) -> t.Optional[exp.Expression]: 736 index = self._index 737 if self._match_text_seq("CLUSTER"): 738 this = self._parse_id_var() 739 if this: 740 return self.expression(exp.OnCluster, this=this) 741 else: 742 self._retreat(index) 743 return None 744 745 def _parse_index_constraint( 746 self, kind: t.Optional[str] = None 747 ) -> exp.IndexColumnConstraint: 748 # INDEX name1 expr TYPE type1(args) GRANULARITY value 749 this = self._parse_id_var() 750 expression = self._parse_assignment() 751 752 index_type = self._match_text_seq("TYPE") and ( 753 self._parse_function() or self._parse_var() 754 ) 755 756 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 757 758 return self.expression( 759 exp.IndexColumnConstraint, 760 this=this, 761 expression=expression, 762 index_type=index_type, 763 granularity=granularity, 764 ) 765 766 def _parse_partition(self) -> t.Optional[exp.Partition]: 767 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 768 if not self._match(TokenType.PARTITION): 769 return None 770 771 if self._match_text_seq("ID"): 772 # Corresponds to the PARTITION ID <string_value> syntax 773 expressions: t.List[exp.Expression] = [ 774 self.expression(exp.PartitionId, this=self._parse_string()) 775 ] 776 else: 777 expressions = self._parse_expressions() 778 779 return self.expression(exp.Partition, expressions=expressions) 780 781 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 782 partition = self._parse_partition() 783 784 if not partition or not self._match(TokenType.FROM): 785 return None 786 787 return self.expression( 788 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 789 ) 790 791 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 792 if not self._match_text_seq("PROJECTION"): 793 return None 794 795 return self.expression( 796 exp.ProjectionDef, 797 this=self._parse_id_var(), 798 expression=self._parse_wrapped(self._parse_statement), 799 ) 800 801 def _parse_constraint(self) -> t.Optional[exp.Expression]: 802 return super()._parse_constraint() or self._parse_projection_def() 803 804 def _parse_alias( 805 self, this: t.Optional[exp.Expression], explicit: bool = False 806 ) -> t.Optional[exp.Expression]: 807 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 808 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 809 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 810 return this 811 812 return super()._parse_alias(this=this, explicit=explicit) 813 814 def _parse_expression(self) -> t.Optional[exp.Expression]: 815 this = super()._parse_expression() 816 817 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 818 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 819 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 820 self._match(TokenType.R_PAREN) 821 822 return this 823 824 def _parse_columns(self) -> exp.Expression: 825 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 826 827 while self._next and self._match_text_seq(")", "APPLY", "("): 828 self._match(TokenType.R_PAREN) 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
832 class Generator(generator.Generator): 833 QUERY_HINTS = False 834 STRUCT_DELIMITER = ("(", ")") 835 NVL2_SUPPORTED = False 836 TABLESAMPLE_REQUIRES_PARENS = False 837 TABLESAMPLE_SIZE_IS_ROWS = False 838 TABLESAMPLE_KEYWORDS = "SAMPLE" 839 LAST_DAY_SUPPORTS_DATE_PART = False 840 CAN_IMPLEMENT_ARRAY_ANY = True 841 SUPPORTS_TO_NUMBER = False 842 JOIN_HINTS = False 843 TABLE_HINTS = False 844 GROUPINGS_SEP = "" 845 SET_OP_MODIFIERS = False 846 SUPPORTS_TABLE_ALIAS_COLUMNS = False 847 VALUES_AS_TABLE = False 848 849 STRING_TYPE_MAPPING = { 850 exp.DataType.Type.CHAR: "String", 851 exp.DataType.Type.LONGBLOB: "String", 852 exp.DataType.Type.LONGTEXT: "String", 853 exp.DataType.Type.MEDIUMBLOB: "String", 854 exp.DataType.Type.MEDIUMTEXT: "String", 855 exp.DataType.Type.TINYBLOB: "String", 856 exp.DataType.Type.TINYTEXT: "String", 857 exp.DataType.Type.TEXT: "String", 858 exp.DataType.Type.VARBINARY: "String", 859 exp.DataType.Type.VARCHAR: "String", 860 } 861 862 SUPPORTED_JSON_PATH_PARTS = { 863 exp.JSONPathKey, 864 exp.JSONPathRoot, 865 exp.JSONPathSubscript, 866 } 867 868 TYPE_MAPPING = { 869 **generator.Generator.TYPE_MAPPING, 870 **STRING_TYPE_MAPPING, 871 exp.DataType.Type.ARRAY: "Array", 872 exp.DataType.Type.BOOLEAN: "Bool", 873 exp.DataType.Type.BIGINT: "Int64", 874 exp.DataType.Type.DATE32: "Date32", 875 exp.DataType.Type.DATETIME: "DateTime", 876 exp.DataType.Type.DATETIME64: "DateTime64", 877 exp.DataType.Type.TIMESTAMP: "DateTime", 878 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 879 exp.DataType.Type.DOUBLE: "Float64", 880 exp.DataType.Type.ENUM: "Enum", 881 exp.DataType.Type.ENUM8: "Enum8", 882 exp.DataType.Type.ENUM16: "Enum16", 883 exp.DataType.Type.FIXEDSTRING: "FixedString", 884 exp.DataType.Type.FLOAT: "Float32", 885 exp.DataType.Type.INT: "Int32", 886 exp.DataType.Type.MEDIUMINT: "Int32", 887 exp.DataType.Type.INT128: "Int128", 888 exp.DataType.Type.INT256: "Int256", 889 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 890 exp.DataType.Type.MAP: "Map", 891 exp.DataType.Type.NESTED: "Nested", 892 exp.DataType.Type.SMALLINT: "Int16", 893 exp.DataType.Type.STRUCT: "Tuple", 894 exp.DataType.Type.TINYINT: "Int8", 895 exp.DataType.Type.UBIGINT: "UInt64", 896 exp.DataType.Type.UINT: "UInt32", 897 exp.DataType.Type.UINT128: "UInt128", 898 exp.DataType.Type.UINT256: "UInt256", 899 exp.DataType.Type.USMALLINT: "UInt16", 900 exp.DataType.Type.UTINYINT: "UInt8", 901 exp.DataType.Type.IPV4: "IPv4", 902 exp.DataType.Type.IPV6: "IPv6", 903 exp.DataType.Type.POINT: "Point", 904 exp.DataType.Type.RING: "Ring", 905 exp.DataType.Type.LINESTRING: "LineString", 906 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 907 exp.DataType.Type.POLYGON: "Polygon", 908 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 909 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 910 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 911 } 912 913 TRANSFORMS = { 914 **generator.Generator.TRANSFORMS, 915 exp.AnyValue: rename_func("any"), 916 exp.ApproxDistinct: rename_func("uniq"), 917 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 918 exp.ArraySize: rename_func("LENGTH"), 919 exp.ArraySum: rename_func("arraySum"), 920 exp.ArgMax: arg_max_or_min_no_count("argMax"), 921 exp.ArgMin: arg_max_or_min_no_count("argMin"), 922 exp.Array: inline_array_sql, 923 exp.CastToStrType: rename_func("CAST"), 924 exp.CountIf: rename_func("countIf"), 925 exp.CompressColumnConstraint: lambda self, 926 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 927 exp.ComputedColumnConstraint: lambda self, 928 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 929 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 930 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 931 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 932 exp.DateStrToDate: rename_func("toDate"), 933 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 934 exp.Explode: rename_func("arrayJoin"), 935 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 936 exp.IsNan: rename_func("isNaN"), 937 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 938 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 939 exp.JSONPathKey: json_path_key_only_name, 940 exp.JSONPathRoot: lambda *_: "", 941 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 942 exp.Nullif: rename_func("nullIf"), 943 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 944 exp.Pivot: no_pivot_sql, 945 exp.Quantile: _quantile_sql, 946 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 947 exp.Rand: rename_func("randCanonical"), 948 exp.StartsWith: rename_func("startsWith"), 949 exp.StrPosition: lambda self, e: self.func( 950 "position", e.this, e.args.get("substr"), e.args.get("position") 951 ), 952 exp.TimeToStr: lambda self, e: self.func( 953 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 954 ), 955 exp.TimeStrToTime: _timestrtotime_sql, 956 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 957 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 958 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 959 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 960 exp.MD5Digest: rename_func("MD5"), 961 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 962 exp.SHA: rename_func("SHA1"), 963 exp.SHA2: sha256_sql, 964 exp.UnixToTime: _unix_to_time_sql, 965 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 966 exp.Trim: trim_sql, 967 exp.Variance: rename_func("varSamp"), 968 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 969 exp.Stddev: rename_func("stddevSamp"), 970 exp.Chr: rename_func("CHAR"), 971 exp.Lag: lambda self, e: self.func( 972 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 973 ), 974 exp.Lead: lambda self, e: self.func( 975 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 976 ), 977 } 978 979 PROPERTIES_LOCATION = { 980 **generator.Generator.PROPERTIES_LOCATION, 981 exp.OnCluster: exp.Properties.Location.POST_NAME, 982 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 983 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 984 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 985 } 986 987 # There's no list in docs, but it can be found in Clickhouse code 988 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 989 ON_CLUSTER_TARGETS = { 990 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 991 "DATABASE", 992 "TABLE", 993 "VIEW", 994 "DICTIONARY", 995 "INDEX", 996 "FUNCTION", 997 "NAMED COLLECTION", 998 } 999 1000 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1001 NON_NULLABLE_TYPES = { 1002 exp.DataType.Type.ARRAY, 1003 exp.DataType.Type.MAP, 1004 exp.DataType.Type.STRUCT, 1005 exp.DataType.Type.POINT, 1006 exp.DataType.Type.RING, 1007 exp.DataType.Type.LINESTRING, 1008 exp.DataType.Type.MULTILINESTRING, 1009 exp.DataType.Type.POLYGON, 1010 exp.DataType.Type.MULTIPOLYGON, 1011 } 1012 1013 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1014 strtodate_sql = self.function_fallback_sql(expression) 1015 1016 if not isinstance(expression.parent, exp.Cast): 1017 # StrToDate returns DATEs in other dialects (eg. postgres), so 1018 # this branch aims to improve the transpilation to clickhouse 1019 return f"CAST({strtodate_sql} AS DATE)" 1020 1021 return strtodate_sql 1022 1023 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1024 this = expression.this 1025 1026 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1027 return self.sql(this) 1028 1029 return super().cast_sql(expression, safe_prefix=safe_prefix) 1030 1031 def trycast_sql(self, expression: exp.TryCast) -> str: 1032 dtype = expression.to 1033 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1034 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1035 dtype.set("nullable", True) 1036 1037 return super().cast_sql(expression) 1038 1039 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1040 this = self.json_path_part(expression.this) 1041 return str(int(this) + 1) if is_int(this) else this 1042 1043 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1044 return f"AS {self.sql(expression, 'this')}" 1045 1046 def _any_to_has( 1047 self, 1048 expression: exp.EQ | exp.NEQ, 1049 default: t.Callable[[t.Any], str], 1050 prefix: str = "", 1051 ) -> str: 1052 if isinstance(expression.left, exp.Any): 1053 arr = expression.left 1054 this = expression.right 1055 elif isinstance(expression.right, exp.Any): 1056 arr = expression.right 1057 this = expression.left 1058 else: 1059 return default(expression) 1060 1061 return prefix + self.func("has", arr.this.unnest(), this) 1062 1063 def eq_sql(self, expression: exp.EQ) -> str: 1064 return self._any_to_has(expression, super().eq_sql) 1065 1066 def neq_sql(self, expression: exp.NEQ) -> str: 1067 return self._any_to_has(expression, super().neq_sql, "NOT ") 1068 1069 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1070 # Manually add a flag to make the search case-insensitive 1071 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1072 return self.func("match", expression.this, regex) 1073 1074 def datatype_sql(self, expression: exp.DataType) -> str: 1075 # String is the standard ClickHouse type, every other variant is just an alias. 1076 # Additionally, any supplied length parameter will be ignored. 1077 # 1078 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1079 if expression.this in self.STRING_TYPE_MAPPING: 1080 dtype = "String" 1081 else: 1082 dtype = super().datatype_sql(expression) 1083 1084 # This section changes the type to `Nullable(...)` if the following conditions hold: 1085 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1086 # and change their semantics 1087 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1088 # constraint: "Type of Map key must be a type, that can be represented by integer or 1089 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1090 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1091 parent = expression.parent 1092 nullable = expression.args.get("nullable") 1093 if nullable is True or ( 1094 nullable is None 1095 and not ( 1096 isinstance(parent, exp.DataType) 1097 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1098 and expression.index in (None, 0) 1099 ) 1100 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1101 ): 1102 dtype = f"Nullable({dtype})" 1103 1104 return dtype 1105 1106 def cte_sql(self, expression: exp.CTE) -> str: 1107 if expression.args.get("scalar"): 1108 this = self.sql(expression, "this") 1109 alias = self.sql(expression, "alias") 1110 return f"{this} AS {alias}" 1111 1112 return super().cte_sql(expression) 1113 1114 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1115 return super().after_limit_modifiers(expression) + [ 1116 ( 1117 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1118 if expression.args.get("settings") 1119 else "" 1120 ), 1121 ( 1122 self.seg("FORMAT ") + self.sql(expression, "format") 1123 if expression.args.get("format") 1124 else "" 1125 ), 1126 ] 1127 1128 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1129 params = self.expressions(expression, key="params", flat=True) 1130 return self.func(expression.name, *expression.expressions) + f"({params})" 1131 1132 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1133 return self.func(expression.name, *expression.expressions) 1134 1135 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1136 return self.anonymousaggfunc_sql(expression) 1137 1138 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1139 return self.parameterizedagg_sql(expression) 1140 1141 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1142 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1143 1144 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1145 return f"ON CLUSTER {self.sql(expression, 'this')}" 1146 1147 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1148 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1149 exp.Properties.Location.POST_NAME 1150 ): 1151 this_name = self.sql( 1152 expression.this if isinstance(expression.this, exp.Schema) else expression, 1153 "this", 1154 ) 1155 this_properties = " ".join( 1156 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1157 ) 1158 this_schema = self.schema_columns_sql(expression.this) 1159 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1160 1161 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1162 1163 return super().createable_sql(expression, locations) 1164 1165 def create_sql(self, expression: exp.Create) -> str: 1166 # The comment property comes last in CTAS statements, i.e. after the query 1167 query = expression.expression 1168 if isinstance(query, exp.Query): 1169 comment_prop = expression.find(exp.SchemaCommentProperty) 1170 if comment_prop: 1171 comment_prop.pop() 1172 query.replace(exp.paren(query)) 1173 else: 1174 comment_prop = None 1175 1176 create_sql = super().create_sql(expression) 1177 1178 comment_sql = self.sql(comment_prop) 1179 comment_sql = f" {comment_sql}" if comment_sql else "" 1180 1181 return f"{create_sql}{comment_sql}" 1182 1183 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1184 this = self.indent(self.sql(expression, "this")) 1185 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1186 1187 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1188 this = self.sql(expression, "this") 1189 this = f" {this}" if this else "" 1190 expr = self.sql(expression, "expression") 1191 expr = f" {expr}" if expr else "" 1192 index_type = self.sql(expression, "index_type") 1193 index_type = f" TYPE {index_type}" if index_type else "" 1194 granularity = self.sql(expression, "granularity") 1195 granularity = f" GRANULARITY {granularity}" if granularity else "" 1196 1197 return f"INDEX{this}{expr}{index_type}{granularity}" 1198 1199 def partition_sql(self, expression: exp.Partition) -> str: 1200 return f"PARTITION {self.expressions(expression, flat=True)}" 1201 1202 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1203 return f"ID {self.sql(expression.this)}" 1204 1205 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1206 return ( 1207 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1208 ) 1209 1210 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1211 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1013 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1014 strtodate_sql = self.function_fallback_sql(expression) 1015 1016 if not isinstance(expression.parent, exp.Cast): 1017 # StrToDate returns DATEs in other dialects (eg. postgres), so 1018 # this branch aims to improve the transpilation to clickhouse 1019 return f"CAST({strtodate_sql} AS DATE)" 1020 1021 return strtodate_sql
1023 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1024 this = expression.this 1025 1026 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1027 return self.sql(this) 1028 1029 return super().cast_sql(expression, safe_prefix=safe_prefix)
1031 def trycast_sql(self, expression: exp.TryCast) -> str: 1032 dtype = expression.to 1033 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1034 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1035 dtype.set("nullable", True) 1036 1037 return super().cast_sql(expression)
1074 def datatype_sql(self, expression: exp.DataType) -> str: 1075 # String is the standard ClickHouse type, every other variant is just an alias. 1076 # Additionally, any supplied length parameter will be ignored. 1077 # 1078 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1079 if expression.this in self.STRING_TYPE_MAPPING: 1080 dtype = "String" 1081 else: 1082 dtype = super().datatype_sql(expression) 1083 1084 # This section changes the type to `Nullable(...)` if the following conditions hold: 1085 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1086 # and change their semantics 1087 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1088 # constraint: "Type of Map key must be a type, that can be represented by integer or 1089 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1090 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1091 parent = expression.parent 1092 nullable = expression.args.get("nullable") 1093 if nullable is True or ( 1094 nullable is None 1095 and not ( 1096 isinstance(parent, exp.DataType) 1097 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1098 and expression.index in (None, 0) 1099 ) 1100 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1101 ): 1102 dtype = f"Nullable({dtype})" 1103 1104 return dtype
1114 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1115 return super().after_limit_modifiers(expression) + [ 1116 ( 1117 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1118 if expression.args.get("settings") 1119 else "" 1120 ), 1121 ( 1122 self.seg("FORMAT ") + self.sql(expression, "format") 1123 if expression.args.get("format") 1124 else "" 1125 ), 1126 ]
1147 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1148 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1149 exp.Properties.Location.POST_NAME 1150 ): 1151 this_name = self.sql( 1152 expression.this if isinstance(expression.this, exp.Schema) else expression, 1153 "this", 1154 ) 1155 this_properties = " ".join( 1156 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1157 ) 1158 this_schema = self.schema_columns_sql(expression.this) 1159 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1160 1161 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1162 1163 return super().createable_sql(expression, locations)
1165 def create_sql(self, expression: exp.Create) -> str: 1166 # The comment property comes last in CTAS statements, i.e. after the query 1167 query = expression.expression 1168 if isinstance(query, exp.Query): 1169 comment_prop = expression.find(exp.SchemaCommentProperty) 1170 if comment_prop: 1171 comment_prop.pop() 1172 query.replace(exp.paren(query)) 1173 else: 1174 comment_prop = None 1175 1176 create_sql = super().create_sql(expression) 1177 1178 comment_sql = self.sql(comment_prop) 1179 comment_sql = f" {comment_sql}" if comment_sql else "" 1180 1181 return f"{create_sql}{comment_sql}"
1187 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1188 this = self.sql(expression, "this") 1189 this = f" {this}" if this else "" 1190 expr = self.sql(expression, "expression") 1191 expr = f" {expr}" if expr else "" 1192 index_type = self.sql(expression, "index_type") 1193 index_type = f" TYPE {index_type}" if index_type else "" 1194 granularity = self.sql(expression, "granularity") 1195 granularity = f" GRANULARITY {granularity}" if granularity else "" 1196 1197 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql