Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc-gen: migrate scalar functions (string) documentation 4/4 #13927

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 19 additions & 29 deletions datafusion/functions/src/unicode/left.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::cmp::Ordering;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
Expand All @@ -31,12 +31,28 @@ use datafusion_common::cast::{
};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a specified number of characters from the left side of a string.",
syntax_example = "left(str, n)",
sql_example = r#"```sql
> select left('datafusion', 4);
+-----------------------------------+
| left(Utf8("datafusion"),Int64(4)) |
+-----------------------------------+
| data |
+-----------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "Number of characters to return."),
related_udf(name = "right")
)]
#[derive(Debug)]
pub struct LeftFunc {
signature: Signature,
Expand Down Expand Up @@ -99,36 +115,10 @@ impl ScalarUDFImpl for LeftFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_left_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_left_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a specified number of characters from the left side of a string.",
"left(str, n)",
)
.with_sql_example(
r#"```sql
> select left('datafusion', 4);
+-----------------------------------+
| left(Utf8("datafusion"),Int64(4)) |
+-----------------------------------+
| data |
+-----------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of characters to return.")
.with_related_udf("right")
.build()
})
}

/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
/// left('abcde', 2) = 'ab'
/// The implementation uses UTF-8 code points as characters
Expand Down
50 changes: 23 additions & 27 deletions datafusion/functions/src/unicode/lpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::fmt::Write;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
Expand All @@ -31,12 +31,32 @@ use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Pads the left side of a string with another string to a specified string length.",
syntax_example = "lpad(str, n[, padding_str])",
sql_example = r#"```sql
> select lpad('Dolly', 10, 'hello');
+---------------------------------------------+
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
+---------------------------------------------+
| helloDolly |
+---------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "String length to pad to."),
argument(
name = "padding_str",
description = "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
),
related_udf(name = "rpad")
)]
#[derive(Debug)]
pub struct LPadFunc {
signature: Signature,
Expand Down Expand Up @@ -103,34 +123,10 @@ impl ScalarUDFImpl for LPadFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_lpad_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_lpad_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Pads the left side of a string with another string to a specified string length.",
"lpad(str, n[, padding_str])")
.with_sql_example(r#"```sql
> select lpad('Dolly', 10, 'hello');
+---------------------------------------------+
| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
+---------------------------------------------+
| helloDolly |
+---------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("n", "String length to pad to.")
.with_argument("padding_str", "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
.with_related_udf("rpad")
.build()
})
}

/// Extends the string to length 'length' by prepending the characters fill (a space by default).
/// If the string is already longer than length then it is truncated (on the right).
/// lpad('hi', 5, 'xy') = 'xyxhi'
Expand Down
48 changes: 19 additions & 29 deletions datafusion/functions/src/unicode/right.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::cmp::{max, Ordering};
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
Expand All @@ -31,12 +31,28 @@ use datafusion_common::cast::{
};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a specified number of characters from the right side of a string.",
syntax_example = "right(str, n)",
sql_example = r#"```sql
> select right('datafusion', 6);
+------------------------------------+
| right(Utf8("datafusion"),Int64(6)) |
+------------------------------------+
| fusion |
+------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "Number of characters to return."),
related_udf(name = "left")
)]
#[derive(Debug)]
pub struct RightFunc {
signature: Signature,
Expand Down Expand Up @@ -99,36 +115,10 @@ impl ScalarUDFImpl for RightFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_right_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_right_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a specified number of characters from the right side of a string.",
"right(str, n)",
)
.with_sql_example(
r#"```sql
> select right('datafusion', 6);
+------------------------------------+
| right(Utf8("datafusion"),Int64(6)) |
+------------------------------------+
| fusion |
+------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of characters to return")
.with_related_udf("left")
.build()
})
}

/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
/// right('abcde', 2) = 'de'
/// The implementation uses UTF-8 code points as characters
Expand Down
54 changes: 23 additions & 31 deletions datafusion/functions/src/unicode/rpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,37 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::DataFusionError;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::fmt::Write;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use unicode_segmentation::UnicodeSegmentation;
use DataType::{LargeUtf8, Utf8, Utf8View};

#[user_doc(
doc_section(label = "String Functions"),
description = "Pads the right side of a string with another string to a specified string length.",
syntax_example = "rpad(str, n[, padding_str])",
sql_example = r#"```sql
> select rpad('datafusion', 20, '_-');
+-----------------------------------------------+
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
+-----------------------------------------------+
| datafusion_-_-_-_-_- |
+-----------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "n", description = "String length to pad to."),
argument(
name = "padding_str",
description = "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._"
),
related_udf(name = "lpad")
)]
#[derive(Debug)]
pub struct RPadFunc {
signature: Signature,
Expand Down Expand Up @@ -122,38 +142,10 @@ impl ScalarUDFImpl for RPadFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_rpad_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_rpad_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Pads the right side of a string with another string to a specified string length.",
"rpad(str, n[, padding_str])")
.with_sql_example(r#"```sql
> select rpad('datafusion', 20, '_-');
+-----------------------------------------------+
| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
+-----------------------------------------------+
| datafusion_-_-_-_-_- |
+-----------------------------------------------+
```"#)
.with_standard_argument(
"str",
Some("String"),
)
.with_argument("n", "String length to pad to.")
.with_argument("padding_str",
"String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._")
.with_related_udf("lpad")
.build()
})
}

pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
Expand Down
45 changes: 19 additions & 26 deletions datafusion/functions/src/unicode/strpos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,34 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
syntax_example = "strpos(str, substr)",
alternative_syntax = "position(substr in origstr)",
sql_example = r#"```sql
> select strpos('datafusion', 'fus');
+----------------------------------------+
| strpos(Utf8("datafusion"),Utf8("fus")) |
+----------------------------------------+
| 5 |
+----------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "substr", description = "Substring expression to search for.")
)]
#[derive(Debug)]
pub struct StrposFunc {
signature: Signature,
Expand Down Expand Up @@ -79,33 +95,10 @@ impl ScalarUDFImpl for StrposFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_strpos_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_strpos_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.",
"strpos(str, substr)")
.with_sql_example(r#"```sql
> select strpos('datafusion', 'fus');
+----------------------------------------+
| strpos(Utf8("datafusion"),Utf8("fus")) |
+----------------------------------------+
| 5 |
+----------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("substr", "Substring expression to search for.")
.with_alternative_syntax("position(substr in origstr)")
.build()
})
}

fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
match (args[0].data_type(), args[1].data_type()) {
(DataType::Utf8, DataType::Utf8) => {
Expand Down
Loading
Loading