Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (string) documentation 4/4 (#13927)
Browse files Browse the repository at this point in the history
* doc-gen: migrate scalar functions (string) documentation 4/4

* fix: fix typo and update function docs

---------

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
  • Loading branch information
Chen-Yuan-Lai and Cheng-Yuan-Lai authored Dec 29, 2024
1 parent 383f279 commit ab69bb0
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 228 deletions.
48 changes: 19 additions & 29 deletions datafusion/functions/src/unicode/left.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::cmp::Ordering;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
Expand All @@ -31,12 +31,28 @@ use datafusion_common::cast::{
};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a specified number of characters from the left side of a string.",
syntax_example = "left(str, n)",
sql_example = r#"```sql
> select left('datafusion', 4);
+-----------------------------------+
| left(Utf8("datafusion"),Int64(4)) |
+-----------------------------------+
| data |
+-----------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "Number of characters to return."),
related_udf(name = "right")
)]
#[derive(Debug)]
pub struct LeftFunc {
signature: Signature,
Expand Down Expand Up @@ -99,36 +115,10 @@ impl ScalarUDFImpl for LeftFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_left_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_left_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a specified number of characters from the left side of a string.",
"left(str, n)",
)
.with_sql_example(
r#"```sql
> select left('datafusion', 4);
+-----------------------------------+
| left(Utf8("datafusion"),Int64(4)) |
+-----------------------------------+
| data |
+-----------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of characters to return.")
.with_related_udf("right")
.build()
})
}

/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
/// left('abcde', 2) = 'ab'
/// The implementation uses UTF-8 code points as characters
Expand Down
50 changes: 23 additions & 27 deletions datafusion/functions/src/unicode/lpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::fmt::Write;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
Expand All @@ -31,12 +31,32 @@ use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Pads the left side of a string with another string to a specified string length.",
syntax_example = "lpad(str, n[, padding_str])",
sql_example = r#"```sql
> select lpad('Dolly', 10, 'hello');
+---------------------------------------------+
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
+---------------------------------------------+
| helloDolly |
+---------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "String length to pad to."),
argument(
name = "padding_str",
description = "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
),
related_udf(name = "rpad")
)]
#[derive(Debug)]
pub struct LPadFunc {
signature: Signature,
Expand Down Expand Up @@ -103,34 +123,10 @@ impl ScalarUDFImpl for LPadFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_lpad_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_lpad_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Pads the left side of a string with another string to a specified string length.",
"lpad(str, n[, padding_str])")
.with_sql_example(r#"```sql
> select lpad('Dolly', 10, 'hello');
+---------------------------------------------+
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
+---------------------------------------------+
| helloDolly |
+---------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("n", "String length to pad to.")
.with_argument("padding_str", "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
.with_related_udf("rpad")
.build()
})
}

/// Extends the string to length 'length' by prepending the characters fill (a space by default).
/// If the string is already longer than length then it is truncated (on the right).
/// lpad('hi', 5, 'xy') = 'xyxhi'
Expand Down
48 changes: 19 additions & 29 deletions datafusion/functions/src/unicode/right.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::cmp::{max, Ordering};
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
Expand All @@ -31,12 +31,28 @@ use datafusion_common::cast::{
};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a specified number of characters from the right side of a string.",
syntax_example = "right(str, n)",
sql_example = r#"```sql
> select right('datafusion', 6);
+------------------------------------+
| right(Utf8("datafusion"),Int64(6)) |
+------------------------------------+
| fusion |
+------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "Number of characters to return."),
related_udf(name = "left")
)]
#[derive(Debug)]
pub struct RightFunc {
signature: Signature,
Expand Down Expand Up @@ -99,36 +115,10 @@ impl ScalarUDFImpl for RightFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_right_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_right_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a specified number of characters from the right side of a string.",
"right(str, n)",
)
.with_sql_example(
r#"```sql
> select right('datafusion', 6);
+------------------------------------+
| right(Utf8("datafusion"),Int64(6)) |
+------------------------------------+
| fusion |
+------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of characters to return")
.with_related_udf("left")
.build()
})
}

/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
/// right('abcde', 2) = 'de'
/// The implementation uses UTF-8 code points as characters
Expand Down
54 changes: 23 additions & 31 deletions datafusion/functions/src/unicode/rpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,37 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::DataFusionError;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::fmt::Write;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use unicode_segmentation::UnicodeSegmentation;
use DataType::{LargeUtf8, Utf8, Utf8View};

#[user_doc(
doc_section(label = "String Functions"),
description = "Pads the right side of a string with another string to a specified string length.",
syntax_example = "rpad(str, n[, padding_str])",
sql_example = r#"```sql
> select rpad('datafusion', 20, '_-');
+-----------------------------------------------+
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
+-----------------------------------------------+
| datafusion_-_-_-_-_- |
+-----------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "String length to pad to."),
argument(
name = "padding_str",
description = "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
),
related_udf(name = "lpad")
)]
#[derive(Debug)]
pub struct RPadFunc {
signature: Signature,
Expand Down Expand Up @@ -122,38 +142,10 @@ impl ScalarUDFImpl for RPadFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_rpad_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_rpad_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Pads the right side of a string with another string to a specified string length.",
"rpad(str, n[, padding_str])")
.with_sql_example(r#"```sql
> select rpad('datafusion', 20, '_-');
+-----------------------------------------------+
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
+-----------------------------------------------+
| datafusion_-_-_-_-_- |
+-----------------------------------------------+
```"#)
.with_standard_argument(
"str",
Some("String"),
)
.with_argument("n", "String length to pad to.")
.with_argument("padding_str",
"String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
.with_related_udf("lpad")
.build()
})
}

pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
Expand Down
45 changes: 19 additions & 26 deletions datafusion/functions/src/unicode/strpos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,34 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
syntax_example = "strpos(str, substr)",
alternative_syntax = "position(substr in origstr)",
sql_example = r#"```sql
> select strpos('datafusion', 'fus');
+----------------------------------------+
| strpos(Utf8("datafusion"),Utf8("fus")) |
+----------------------------------------+
| 5 |
+----------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "substr", description = "Substring expression to search for.")
)]
#[derive(Debug)]
pub struct StrposFunc {
signature: Signature,
Expand Down Expand Up @@ -79,33 +95,10 @@ impl ScalarUDFImpl for StrposFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_strpos_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_strpos_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
"strpos(str, substr)")
.with_sql_example(r#"```sql
> select strpos('datafusion', 'fus');
+----------------------------------------+
| strpos(Utf8("datafusion"),Utf8("fus")) |
+----------------------------------------+
| 5 |
+----------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("substr", "Substring expression to search for.")
.with_alternative_syntax("position(substr in origstr)")
.build()
})
}

fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
match (args[0].data_type(), args[1].data_type()) {
(DataType::Utf8, DataType::Utf8) => {
Expand Down
Loading

0 comments on commit ab69bb0

Please sign in to comment.