Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (string) documentation 1/4
Browse files Browse the repository at this point in the history
  • Loading branch information
Cheng-Yuan-Lai committed Dec 30, 2024
1 parent a08dc0a commit bd8033e
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 211 deletions.
58 changes: 24 additions & 34 deletions datafusion/functions/src/string/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the Unicode character code of the first character in a string.",
syntax_example = "ascii(str)",
sql_example = r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "chr")
)]
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
Expand Down Expand Up @@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_ascii_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the Unicode character code of the first character in a string.",
"ascii(str)",
)
.with_sql_example(
r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("chr")
.build()
})
}

fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
Expand Down
47 changes: 18 additions & 29 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,29 @@
use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the bit length of a string.",
syntax_example = "bit_length(str)",
sql_example = r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "length"),
related_udf(name = "octet_length")
)]
#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_bit_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_bit_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the bit length of a string.",
"bit_length(str)",
)
.with_sql_example(
r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("length")
.with_related_udf("octet_length")
.build()
})
}
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::array::StringArray;
Expand All @@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8;
use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
/// chr(65) = 'A'
Expand Down Expand Up @@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the character with the specified ASCII or Unicode code value.",
syntax_example = "chr(expression)",
sql_example = r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
standard_argument(name = "expression", prefix = "String"),
related_udf(name = "ascii")
)]
#[derive(Debug)]
pub struct ChrFunc {
signature: Signature,
Expand Down Expand Up @@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_chr_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_chr_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the character with the specified ASCII or Unicode code value.",
"chr(expression)",
)
.with_sql_example(
r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
)
.with_standard_argument("expression", Some("String"))
.with_related_udf("ascii")
.build()
})
}
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/contains.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Return true if search_str is found within string (case-sensitive).",
syntax_example = "contains(str, search_str)",
sql_example = r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "search_str", description = "The string to search for in str.")
)]
#[derive(Debug)]
pub struct ContainsFunc {
signature: Signature,
Expand Down Expand Up @@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_contains_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_contains_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Return true if search_str is found within string (case-sensitive).",
"contains(str, search_str)",
)
.with_sql_example(
r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("search_str", "The string to search for in str.")
.build()
})
}

/// use `arrow::compute::contains` to do the calculation for contains
pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
match (args[0].data_type(), args[1].data_type()) {
Expand Down
47 changes: 18 additions & 29 deletions datafusion/functions/src/string/octet_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,29 @@
use arrow::compute::kernels::length::length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the length of a string in bytes.",
syntax_example = "octet_length(str)",
sql_example = r#"```sql
> select octet_length('Ångström');
+--------------------------------+
| octet_length(Utf8("Ångström")) |
+--------------------------------+
| 10 |
+--------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "bit_length"),
related_udf(name = "length")
)]
#[derive(Debug)]
pub struct OctetLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -92,36 +107,10 @@ impl ScalarUDFImpl for OctetLengthFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_octet_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_octet_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the length of a string in bytes.",
"octet_length(str)",
)
.with_sql_example(
r#"```sql
> select octet_length('Ångström');
+--------------------------------+
| octet_length(Utf8("Ångström")) |
+--------------------------------+
| 10 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("bit_length")
.with_related_udf("length")
.build()
})
}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand Down
Loading

0 comments on commit bd8033e

Please sign in to comment.