From fb5378d3ba23855c06c7baac653b91b4974f6d3c Mon Sep 17 00:00:00 2001 From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com> Date: Sun, 29 Dec 2024 21:09:04 +0800 Subject: [PATCH] doc-gen: migrate scalar functions (crypto) documentation (#13918) * doc-gen: migrate scalar functions (crypto) documentation * doc-gen: fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai --- datafusion/functions/src/crypto/digest.rs | 68 ++++++++----------- datafusion/functions/src/crypto/md5.rs | 43 +++++------- datafusion/functions/src/crypto/sha224.rs | 43 +++++------- datafusion/functions/src/crypto/sha256.rs | 43 +++++------- datafusion/functions/src/crypto/sha384.rs | 43 +++++------- datafusion/functions/src/crypto/sha512.rs | 43 +++++------- .../source/user-guide/sql/scalar_functions.md | 18 ++--- 7 files changed, 117 insertions(+), 184 deletions(-) diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index 0d2d130cdd71..cc52f32614fd 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -19,13 +19,38 @@ use super::basic::{digest, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the binary hash of an expression using the specified algorithm.", + syntax_example = "digest(expression, algorithm)", + sql_example = r#"```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String"), + argument( + name = "algorithm", + description = "String expression specifying algorithm to use. Must be one of: + - md5 + - sha224 + - sha256 + - sha384 + - sha512 + - blake2s + - blake2b + - blake3" + ) +)] #[derive(Debug)] pub struct DigestFunc { signature: Signature, @@ -78,43 +103,6 @@ impl ScalarUDFImpl for DigestFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_digest_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_digest_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the binary hash of an expression using the specified algorithm.", - "digest(expression, algorithm)", - ) - .with_sql_example( - r#"```sql -> select digest('foo', 'sha256'); -+------------------------------------------+ -| digest(Utf8("foo"), Utf8("sha256")) | -+------------------------------------------+ -| | -+------------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .with_argument( - "algorithm", - "String expression specifying algorithm to use. Must be one of: - -- md5 -- sha224 -- sha256 -- sha384 -- sha512 -- blake2s -- blake2b -- blake3", - ) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index e6cc59a4a4f7..636ca65735c9 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -19,13 +19,26 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes an MD5 128-bit checksum for a string expression.", + syntax_example = "md5(expression)", + sql_example = r#"```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct Md5Func { signature: Signature, @@ -94,30 +107,6 @@ impl ScalarUDFImpl for Md5Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_md5_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_md5_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes an MD5 128-bit checksum for a string expression.", - "md5(expression)", - ) - .with_sql_example( - r#"```sql -> select md5('foo'); -+-------------------------------------+ -| md5(Utf8("foo")) | -+-------------------------------------+ -| | -+-------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index eba22bb3de37..341b3495f9c6 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -19,13 +19,26 @@ use super::basic::{sha224, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-224 hash of a binary string.", + syntax_example = "sha224(expression)", + sql_example = r#"```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA224Func { signature: Signature, @@ -50,30 +63,6 @@ impl SHA224Func { } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha224_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-224 hash of a binary string.", - "sha224(expression)", - ) - .with_sql_example( - r#"```sql -> select sha224('foo'); -+------------------------------------------+ -| sha224(Utf8("foo")) | -+------------------------------------------+ -| | -+------------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} - impl ScalarUDFImpl for SHA224Func { fn as_any(&self) -> &dyn Any { self @@ -100,6 +89,6 @@ impl ScalarUDFImpl for SHA224Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha224_doc()) + self.doc() } } diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 9343fa0af942..f40dd99c59fe 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -19,13 +19,26 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-256 hash of a binary string.", + syntax_example = "sha256(expression)", + sql_example = r#"```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA256Func { signature: Signature, @@ -74,30 +87,6 @@ impl ScalarUDFImpl for SHA256Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha256_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha256_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-256 hash of a binary string.", - "sha256(expression)", - ) - .with_sql_example( - r#"```sql -> select sha256('foo'); -+--------------------------------------+ -| sha256(Utf8("foo")) | -+--------------------------------------+ -| | -+--------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 495036d02474..e38a755826f8 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -19,13 +19,26 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-384 hash of a binary string.", + syntax_example = "sha384(expression)", + sql_example = r#"```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA384Func { signature: Signature, @@ -74,30 +87,6 @@ impl ScalarUDFImpl for SHA384Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha384_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha384_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-384 hash of a binary string.", - "sha384(expression)", - ) - .with_sql_example( - r#"```sql -> select sha384('foo'); -+-----------------------------------------+ -| sha384(Utf8("foo")) | -+-----------------------------------------+ -| | -+-----------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index 7d454ff8da35..7fe2a26ebbce 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -19,13 +19,26 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-512 hash of a binary string.", + syntax_example = "sha512(expression)", + sql_example = r#"```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA512Func { signature: Signature, @@ -74,30 +87,6 @@ impl ScalarUDFImpl for SHA512Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha512_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha512_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-512 hash of a binary string.", - "sha512(expression)", - ) - .with_sql_example( - r#"```sql -> select sha512('foo'); -+-------------------------------------------+ -| sha512(Utf8("foo")) | -+-------------------------------------------+ -| | -+-------------------------------------------+ -```"#, - ) - .with_argument("expression", "String") - .build() - }) -} diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index be4f5e56b3af..081509165edf 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4195,14 +4195,14 @@ digest(expression, algorithm) - **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - **algorithm**: String expression specifying algorithm to use. Must be one of: -- md5 -- sha224 -- sha256 -- sha384 -- sha512 -- blake2s -- blake2b -- blake3 + - md5 + - sha224 + - sha256 + - sha384 + - sha512 + - blake2s + - blake2b + - blake3 #### Example @@ -4317,7 +4317,7 @@ sha512(expression) #### Arguments -- **expression**: String +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. #### Example