diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 285eec505..4f62ab8a1 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -719,6 +719,8 @@ impl fmt::Display for DataType { ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"), ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"), + ArrayElemTypeDef::Keyword(t, None) => write!(f, "{t} ARRAY"), + ArrayElemTypeDef::Keyword(t, Some(size)) => write!(f, "{t} ARRAY[{size}]"), }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { @@ -1146,6 +1148,8 @@ pub enum ArrayElemTypeDef { SquareBracket(Box, Option), /// Parenthesis style, e.g. `Array(Int64)`. Parenthesis(Box), + /// Keyword style with an optional size, e.g. `INT ARRAY` or `INT ARRAY[4]`. + Keyword(Box, Option), } /// Represents different types of geometric shapes which are commonly used in diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 46826d7be..9d939430f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1088,14 +1088,10 @@ pub enum Expr { kind: CastKind, /// Expression being cast. expr: Box, - /// Target data type. + /// Target data type. A trailing `ARRAY` keyword (e.g. + /// `CAST(... AS UNSIGNED ARRAY)`) is captured as [`DataType::Array`] + /// with [`ArrayElemTypeDef::Keyword`]. data_type: DataType, - /// [MySQL] allows CAST(... AS type ARRAY) in functional index definitions for InnoDB - /// multi-valued indices. It's not really a datatype, and is only allowed in `CAST` in key - /// specifications, so it's a flag here. - /// - /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast - array: bool, /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax @@ -1957,14 +1953,10 @@ impl fmt::Display for Expr { kind, expr, data_type, - array, format, } => match kind { CastKind::Cast => { write!(f, "CAST({expr} AS {data_type}")?; - if *array { - write!(f, " ARRAY")?; - } if let Some(format) = format { write!(f, " FORMAT {format}")?; } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index e309705c8..43f91a24e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1591,7 +1591,6 @@ impl Spanned for Expr { kind: _, expr, data_type: _, - array: _, format: _, } => expr.span(), Expr::AtTimeZone { diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 9b2ede40d..6c04eef73 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1398,6 +1398,13 @@ pub trait Dialect: Debug + Any { fn supports_array_typedef_with_brackets(&self) -> bool { false } + /// Returns true if the dialect supports array type definition with the + /// `ARRAY` keyword and an optional size. For example: + /// ```CREATE TABLE my_table (arr1 INT ARRAY, arr2 INT ARRAY[3])``` + /// ```SELECT x::INT ARRAY[3]``` + fn supports_array_typedef_with_keyword(&self) -> bool { + false + } /// Returns true if the dialect supports geometric types. /// /// Postgres: diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index c40d6d674..bd4604f35 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -274,6 +274,11 @@ impl Dialect for PostgreSqlDialect { true } + /// See: + fn supports_array_typedef_with_keyword(&self) -> bool { + true + } + fn supports_geometric_types(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 91ac386ae..1c913a66f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1765,7 +1765,6 @@ impl<'a> Parser<'a> { kind: CastKind::Cast, expr: Box::new(parser.parse_expr()?), data_type: DataType::Binary(None), - array: false, format: None, }) } @@ -2807,15 +2806,19 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword_is(Keyword::AS)?; - let data_type = self.parse_data_type()?; - let array = self.parse_keyword(Keyword::ARRAY); + let mut data_type = self.parse_data_type()?; + // A trailing `ARRAY` keyword makes the target an array type, e.g. MySQL's + // `CAST(... AS UNSIGNED ARRAY)`. PostgreSQL already consumes it while + // parsing the data type, so the guard avoids wrapping it twice. + if !matches!(data_type, DataType::Array(_)) && self.parse_keyword(Keyword::ARRAY) { + data_type = DataType::Array(ArrayElemTypeDef::Keyword(Box::new(data_type), None)); + } let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { kind, expr: Box::new(expr), data_type, - array, format, }) } @@ -4097,7 +4100,6 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, - array: false, format: None, }) } else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() { @@ -4345,7 +4347,6 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, - array: false, format: None, }) } @@ -12825,6 +12826,22 @@ impl<'a> Parser<'a> { data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size)) } } + + // Keyword form, e.g. `INT ARRAY` or `INT ARRAY[3]`. It is one-dimensional, + // so only a single optional size is accepted (multidimensional arrays use + // the bracket form above). + if self.dialect.supports_array_typedef_with_keyword() && self.parse_keyword(Keyword::ARRAY) + { + let size = if self.consume_token(&Token::LBracket) { + let size = self.maybe_parse(|p| p.parse_literal_uint())?; + self.expect_token(&Token::RBracket)?; + size + } else { + None + }; + data = DataType::Array(ArrayElemTypeDef::Keyword(Box::new(data), size)); + } + Ok((data, trailing_bracket)) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f470b93ca..6e9676c18 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3146,7 +3146,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3159,7 +3158,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3191,7 +3189,6 @@ fn parse_cast() { length: 50, unit: None, })), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3204,7 +3201,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3217,7 +3213,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3230,7 +3225,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3243,7 +3237,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(BinaryLength::IntegerLength { length: 50 })), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3256,7 +3249,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3269,7 +3261,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3282,7 +3273,6 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("details"))), data_type: DataType::JSONB, - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3298,7 +3288,6 @@ fn parse_try_cast() { kind: CastKind::TryCast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -6641,7 +6630,6 @@ fn interval_disallow_interval_expr_double_colon() { fractional_seconds_precision: None, })), data_type: DataType::Text, - array: false, format: None, } ) @@ -6659,7 +6647,6 @@ fn parse_text_type_modifier_double_colon_cast() { ObjectName::from(vec![Ident::new("TEXT")]), vec!["16777216".to_string()] ), - array: false, format: None, } ); @@ -9412,7 +9399,6 @@ fn parse_double_colon_cast_at_timezone() { .with_empty_span() )), data_type: DataType::Timestamp(None, TimezoneInfo::None), - array: false, format: None }), time_zone: Box::new(Expr::Value( @@ -13812,7 +13798,6 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-01".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), - array: false, format: None, }), }, @@ -13824,7 +13809,6 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-05".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), - array: false, format: None, }), }, @@ -14121,7 +14105,6 @@ fn test_extract_seconds_ok() { fields: None, precision: None }, - array: false, format: None, }), } @@ -14152,7 +14135,6 @@ fn test_extract_seconds_ok() { fields: None, precision: None, }, - array: false, format: None, }), })], @@ -14210,7 +14192,6 @@ fn test_extract_seconds_single_quote_ok() { fields: None, precision: None }, - array: false, format: None, }), } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 0ed0b4118..7c582546f 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -421,7 +421,6 @@ fn data_type_timestamp_ntz() { "created_at".into() )))), data_type: DataType::TimestampNtz(None), - array: false, format: None } ); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 86c765813..7fa086828 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -388,7 +388,6 @@ fn test_duckdb_specific_int_types() { Value::Number("123".parse().unwrap(), false).with_empty_span() )), data_type: data_type.clone(), - array: false, format: None, }, expr_from_projection(&select.projection[0]) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f513d3670..520e02567 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -886,7 +886,6 @@ fn test_functional_key_part() { )), }), data_type: DataType::Unsigned, - array: false, format: None, })), ); @@ -903,8 +902,10 @@ fn test_functional_key_part() { Value::SingleQuotedString("$.fields".to_string()).with_empty_span() )), }), - data_type: DataType::Unsigned, - array: true, + data_type: DataType::Array(ArrayElemTypeDef::Keyword( + Box::new(DataType::Unsigned), + None, + )), format: None, })), ); @@ -4278,12 +4279,47 @@ fn parse_cast_integers() { #[test] fn parse_cast_array() { - mysql().verified_expr("CAST(foo AS SIGNED ARRAY)"); + // The element type may be any type accepted by CAST(). + for ty in [ + "SIGNED", + "UNSIGNED", + "CHAR", + "CHAR(10)", + "BINARY", + "BINARY(5)", + "DATE", + "TIME", + "DATETIME", + "DECIMAL", + "DECIMAL(10,2)", + "DOUBLE", + "FLOAT", + "YEAR", + ] { + mysql().verified_expr(&format!("CAST(foo AS {ty} ARRAY)")); + } + + // `ARRAY` on its own is not a valid CAST target type. mysql() .run_parser_method("CAST(foo AS ARRAY)", |p| p.parse_expr()) .expect_err("ARRAY alone is not a type"); } +#[test] +fn parse_multi_valued_index() { + // `CAST(... AS ARRAY)` key part in CREATE TABLE, CREATE INDEX, and + // ALTER TABLE. See https://dev.mysql.com/doc/refman/8.0/en/create-index.html + mysql_and_generic().verified_stmt( + "CREATE TABLE customers (id BIGINT, custinfo JSON, INDEX zips ((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY))))", + ); + mysql_and_generic().verified_stmt( + "CREATE INDEX zips ON customers((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY)))", + ); + mysql_and_generic().verified_stmt( + "ALTER TABLE customers ADD INDEX zips ((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY)))", + ); +} + #[test] fn parse_match_against_with_alias() { let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)"; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 274988be0..58ad71aab 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2016,7 +2016,6 @@ fn parse_execute() { (Value::Number("1337".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), - array: false, format: None }, alias: None @@ -2028,7 +2027,6 @@ fn parse_execute() { (Value::Number("7331".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), - array: false, format: None }, alias: None @@ -2655,7 +2653,6 @@ fn parse_array_index_expr() { ))), None )), - array: false, format: None, }))), access_chain: vec![ @@ -2681,6 +2678,25 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_array_type_def_with_keyword() { + // SQL-standard `ARRAY` keyword with an optional size, in a column + // definition and as a CAST target. See: + // https://www.postgresql.org/docs/current/arrays.html#ARRAYS-DECLARATION + pg().verified_stmt("CREATE TABLE sal_emp (pay_by_quarter INTEGER ARRAY)"); + pg().verified_stmt("CREATE TABLE sal_emp (pay_by_quarter INTEGER ARRAY[4])"); + pg().verified_stmt("CREATE TABLE genome (codons CHAR(3) ARRAY[1000])"); + pg().verified_stmt("CREATE TABLE t (a VARCHAR(10) ARRAY[2])"); + pg().verified_stmt("CREATE TABLE genome (codons CHAR(3) ARRAY[1000] NOT NULL)"); + pg().verified_stmt( + "CREATE TEMPORARY TABLE arrtest2 (i INTEGER ARRAY[4], f FLOAT8[], n NUMERIC[], t TEXT[], d TIMESTAMP[])", + ); + pg().verified_stmt("CREATE TABLE p (e MONEY ARRAY, f MONEY ARRAY[7])"); + pg().verified_only_select("SELECT CAST(ARRAY[1, 2, 3] AS INTEGER ARRAY)"); + pg().verified_only_select("SELECT CAST(ARRAY[1, 2, 3] AS INTEGER ARRAY[3])"); + pg().verified_only_select("SELECT foo::INTEGER ARRAY[3]"); +} + #[test] fn parse_array_subscript() { let tests = [ @@ -6040,7 +6056,6 @@ fn parse_at_time_zone() { Value::SingleQuotedString("America/Los_Angeles".to_owned()).with_empty_span(), )), data_type: DataType::Text, - array: false, format: None, }), }), @@ -6901,7 +6916,6 @@ fn arrow_cast_precedence() { (Value::SingleQuotedString("bar".to_string())).with_empty_span() )), data_type: DataType::Text, - array: false, format: None, }), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 14eb5d9f1..ae44312e4 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1269,7 +1269,6 @@ fn parse_array() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(ArrayElemTypeDef::None), - array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -1379,7 +1378,6 @@ fn parse_semi_structured_data_traversal() { } }), data_type: DataType::Array(ArrayElemTypeDef::None), - array: false, format: None, }), path: JsonPath {