diff --git a/docs/data-operate/import/load-data-format.md b/docs/data-operate/import/load-data-format.md index 86657322539c6..03cc891896845 100644 --- a/docs/data-operate/import/load-data-format.md +++ b/docs/data-operate/import/load-data-format.md @@ -33,8 +33,8 @@ The following import methods support data import in CSV format: - [Broker Load](./import-way/broker-load-manual.md) - [Routine Load](./import-way/routine-load-manual.md) - [MySQL Load](./import-way/mysql-load-manual.md) -- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-functions/s3) -- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-functions/hdfs) +- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-valued-functions/s3) +- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-valued-functions/hdfs) ### Supported CSV Formats - csv: File without header and type @@ -136,8 +136,8 @@ Currently, only the following import methods support data import in JSON format: - [Stream Load](./import-way/stream-load-manual.md) - [Broker Load](./import-way/broker-load-manual.md) - [Routine Load](./import-way/routine-load-manual.md) -- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-functions/s3) -- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-functions/hdfs) +- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-valued-functions/s3) +- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-valued-functions/hdfs) ### Supported JSON Formats @@ -725,8 +725,8 @@ For Kafka data sources, the content in each Massage is treated as a complete JSO The following import methods support importing data in CSV format: - [Stream Load](./import-way/stream-load-manual.md) - [Broker Load](./import-way/broker-load-manual.md) -- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-functions/s3) -- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-functions/hdfs) +- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-valued-functions/s3) +- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-valued-functions/hdfs) ### Import Examples @@ -762,8 +762,8 @@ WITH S3 The following import methods support importing data in CSV format: - [Stream Load](./import-way/stream-load-manual.md) - [Broker Load](./import-way/broker-load-manual.md) -- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-functions/s3) -- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-functions/hdfs) +- [INSERT INTO FROM S3 TVF](../../sql-manual/sql-functions/table-valued-functions/s3) +- [INSERT INTO FROM HDFS TVF](../../sql-manual/sql-functions/table-valued-functions/hdfs) ### Import Examples @@ -792,4 +792,4 @@ WITH S3 "AWS_SECRET_KEY"="AWS_SECRET_KEY", "AWS_REGION" = "AWS_REGION" ); -``` \ No newline at end of file +``` diff --git a/docs/lakehouse/datalake-analytics/iceberg.md b/docs/lakehouse/datalake-analytics/iceberg.md index 97712ff3395d7..4d2f2f311a1e1 100644 --- a/docs/lakehouse/datalake-analytics/iceberg.md +++ b/docs/lakehouse/datalake-analytics/iceberg.md @@ -268,4 +268,4 @@ You can use the `FOR TIME AS OF` and `FOR VERSION AS OF` statements to read hist `SELECT * FROM iceberg_tbl FOR VERSION AS OF 868895038966572;` -In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. +In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-valued-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. diff --git a/docs/lakehouse/file.md b/docs/lakehouse/file.md index 710750f7b0bca..5694086386af1 100644 --- a/docs/lakehouse/file.md +++ b/docs/lakehouse/file.md @@ -30,9 +30,11 @@ With the Table Value Function feature, Doris is able to query files in object st For more usage details, please see the documentation: -* [S3](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/s3/): supports file analysis on object storage compatible with S3 +* [S3](../sql-manual/sql-functions/table-valued-functions/s3.md): supports file analysis on object storage compatible with S3 -* [HDFS](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/hdfs/): supports file analysis on HDFS +* [HDFS](../sql-manual/sql-functions/table-valued-functions/hdfs.md): supports file analysis on HDFS + +* [LOCAL](../sql-manual/sql-functions/table-valued-functions/local.md): supports file analysis on local file system The followings illustrate how file analysis is conducted with the example of S3 Table Value Function. diff --git a/docs/query/view-materialized-view/async-materialized-view.md b/docs/query/view-materialized-view/async-materialized-view.md index f1cae80e82000..1031fb05f9a56 100644 --- a/docs/query/view-materialized-view/async-materialized-view.md +++ b/docs/query/view-materialized-view/async-materialized-view.md @@ -111,7 +111,7 @@ Specific syntax can be viewed [CREATE ASYNC MATERIALIZED VIEW](../../sql-manual/ select * from mv_infos("database"="tpch") where Name="mv1"; ``` -The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos.md) +The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos.md) Properties related to table, still viewed through [SHOW TABLES](../../sql-manual/sql-statements/Show-Statements/SHOW-TABLES.md) @@ -142,7 +142,7 @@ Task is used to describe specific refresh information, such as the time used for select * from jobs("type"="mv") order by CreateTime; ``` -Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-functions/jobs.md) +Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/jobs.md) #### Pause materialized view job scheduled scheduling @@ -170,7 +170,7 @@ Specific syntax can be viewed [RESUME MATERIALIZED VIEW JOB](../../sql-manual/sq select * from tasks("type"="mv"); ``` -Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) #### Cancel the task of objectifying the view @@ -536,7 +536,7 @@ The commonly used commands for `olapTable` are also applicable to materialized v The unique commands for materialized views mainly include the following: #### View materialized view metadata -[mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos) +[mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos) Focus on the following fields: - State: If the state changes to SCHEMA_CHANGE, it means the schema of the base table has changed. In this case, the materialized view cannot be used for transparent rewriting (but direct querying of the materialized view is not affected). If the next refresh task is successful, the state will be restored to NORMAL. @@ -544,7 +544,7 @@ Focus on the following fields: - RefreshState: The status of the last refresh task of the materialized view. If it is FAIL, it means the execution failed, and further localization can be done through tasks(). - SyncWithBaseTables: Whether the materialized view is synchronized with the base table data. If not synchronized, further determination can be made by using show partitions to identify which partition is not synchronized. #### View tasks for the materialized view -[tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +[tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) Focus on the following fields: - Status: If it is FAILED, it means the task execution failed. You can check the reason for failure through ErrorMsg. You can also search Doris logs using LastQueryId to get more detailed error information. diff --git a/docs/sql-manual/sql-functions/table-functions/backends.md b/docs/sql-manual/sql-functions/table-valued-functions/backends.md similarity index 100% rename from docs/sql-manual/sql-functions/table-functions/backends.md rename to docs/sql-manual/sql-functions/table-valued-functions/backends.md diff --git a/docs/sql-manual/sql-functions/table-functions/catalogs.md b/docs/sql-manual/sql-functions/table-valued-functions/catalogs.md similarity index 100% rename from docs/sql-manual/sql-functions/table-functions/catalogs.md rename to docs/sql-manual/sql-functions/table-valued-functions/catalogs.md diff --git a/docs/sql-manual/sql-functions/table-functions/frontends.md b/docs/sql-manual/sql-functions/table-valued-functions/frontends.md similarity index 100% rename from docs/sql-manual/sql-functions/table-functions/frontends.md rename to docs/sql-manual/sql-functions/table-valued-functions/frontends.md diff --git a/docs/sql-manual/sql-functions/table-functions/frontends_disks.md b/docs/sql-manual/sql-functions/table-valued-functions/frontends_disks.md similarity index 98% rename from docs/sql-manual/sql-functions/table-functions/frontends_disks.md rename to docs/sql-manual/sql-functions/table-valued-functions/frontends_disks.md index f75268f31f4cb..0532fc477ebac 100644 --- a/docs/sql-manual/sql-functions/table-functions/frontends_disks.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -1,6 +1,6 @@ --- { - "title": "frontends_disks", + "title": "FRONTENDS_DISKS", "language": "en" } --- @@ -84,4 +84,4 @@ mysql> select * from frontends_disk()\G ### keywords - frontends_disks \ No newline at end of file + frontends_disks diff --git a/docs/sql-manual/sql-functions/table-functions/hdfs.md b/docs/sql-manual/sql-functions/table-valued-functions/hdfs.md similarity index 96% rename from docs/sql-manual/sql-functions/table-functions/hdfs.md rename to docs/sql-manual/sql-functions/table-valued-functions/hdfs.md index 46198b0451d53..7748a302ab48b 100644 --- a/docs/sql-manual/sql-functions/table-functions/hdfs.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -92,7 +92,11 @@ File format parameters: other kinds of parameters: - `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. -- `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . +- `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: ### Examples diff --git a/docs/sql-manual/sql-functions/table-functions/iceberg-meta.md b/docs/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md similarity index 100% rename from docs/sql-manual/sql-functions/table-functions/iceberg-meta.md rename to docs/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md diff --git a/docs/sql-manual/sql-functions/table-functions/jobs.md b/docs/sql-manual/sql-functions/table-valued-functions/jobs.md similarity index 99% rename from docs/sql-manual/sql-functions/table-functions/jobs.md rename to docs/sql-manual/sql-functions/table-valued-functions/jobs.md index 3a072259be21d..3bc7276e08e3e 100644 --- a/docs/sql-manual/sql-functions/table-functions/jobs.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -27,10 +27,8 @@ under the License. ## `jobs` ### Name -:::tip + jobs -- since 2.1 -::: ### description @@ -38,6 +36,8 @@ Table function, generating a temporary task table, which can view job informatio This function is used in the from clause. +This function is supported since 2.1.0. + #### syntax `jobs("type"="")` diff --git a/docs/sql-manual/sql-functions/table-functions/local.md b/docs/sql-manual/sql-functions/table-valued-functions/local.md similarity index 99% rename from docs/sql-manual/sql-functions/table-functions/local.md rename to docs/sql-manual/sql-functions/table-valued-functions/local.md index a9b5d333b97a5..4f39a8dae76ae 100644 --- a/docs/sql-manual/sql-functions/table-functions/local.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/local.md @@ -1,6 +1,6 @@ --- { - "title": "local", + "title": "LOCAL", "language": "en" } --- @@ -24,7 +24,7 @@ specific language governing permissions and limitations under the License. --> -## Local +## local ### Name diff --git a/docs/sql-manual/sql-functions/table-functions/mv_infos.md b/docs/sql-manual/sql-functions/table-valued-functions/mv_infos.md similarity index 98% rename from docs/sql-manual/sql-functions/table-functions/mv_infos.md rename to docs/sql-manual/sql-functions/table-valued-functions/mv_infos.md index e9c9c24c9a7d5..e3938ace5e680 100644 --- a/docs/sql-manual/sql-functions/table-functions/mv_infos.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -36,6 +36,8 @@ Table function, generating temporary tables for asynchronous materialized views, This function is used in the from clause. +This funciton is supported since 2.1.0. + #### syntax `mv_infos("database"="")` diff --git a/docs/sql-manual/sql-functions/table-valued-functions/partitions.md b/docs/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..7bda80d77e298 --- /dev/null +++ b/docs/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "en" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +The table function generates a temporary partition TABLE, which allows you to view the PARTITION list of a certain TABLE. + +This function is used in the from clause. + +This function is supported since 2.1.5 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions() Table structure: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:partition id +* PartitionName:partition name +* VisibleVersion:visible version +* VisibleVersionTime:visible version time +* State:state +* PartitionKey:partition key +* Range:range +* DistributionKey:distribution key +* Buckets:bucket num +* ReplicationNum:replication num +* StorageMedium:storage medium +* CooldownTime:cooldown time +* RemoteStoragePolicy:remote storage policy +* LastConsistencyCheckTime:last consistency check time +* DataSize:data size +* IsInMemory:is in memory +* ReplicaAllocation:replica allocation +* IsMutable:is mutable +* SyncWithBaseTables:Is it synchronized with the base table data (for partitioning asynchronous materialized views) +* UnsyncTables:Which base table data is not synchronized with (for partitions of asynchronous materialized views) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:partition name + +### Example + +1. View the partition list of table1 under db1 in the internal catalog + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. View the partition information with partition name partition1 under table1 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. View the partition ID with the partition name 'partition1' under Table 1 + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/docs/sql-manual/sql-functions/table-functions/query.md b/docs/sql-manual/sql-functions/table-valued-functions/query.md similarity index 100% rename from docs/sql-manual/sql-functions/table-functions/query.md rename to docs/sql-manual/sql-functions/table-valued-functions/query.md diff --git a/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/sql-manual/sql-functions/table-valued-functions/s3.md similarity index 99% rename from docs/sql-manual/sql-functions/table-functions/s3.md rename to docs/sql-manual/sql-functions/table-valued-functions/s3.md index 05f62a13fd164..57a15bc13e55d 100644 --- a/docs/sql-manual/sql-functions/table-functions/s3.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/s3.md @@ -102,7 +102,11 @@ The following 2 parameters are used for loading in csv format other parameter: - `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. -- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. +- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: ### Example diff --git a/docs/sql-manual/sql-functions/table-functions/tasks.md b/docs/sql-manual/sql-functions/table-valued-functions/tasks.md similarity index 98% rename from docs/sql-manual/sql-functions/table-functions/tasks.md rename to docs/sql-manual/sql-functions/table-valued-functions/tasks.md index 3055070fea8f1..ebd279effe65d 100644 --- a/docs/sql-manual/sql-functions/table-functions/tasks.md +++ b/docs/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -39,6 +39,8 @@ Table function, generates a temporary table of tasks, which allows you to view t This function is used in the FROM clause. +This functions is supported since 2.1.0. + #### syntax `tasks("type"="insert");` @@ -169,4 +171,4 @@ mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; ### keywords - tasks, job, insert, mv, materilized view \ No newline at end of file + tasks, job, insert, mv, materilized view diff --git a/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md b/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md index d584384369058..441dffc9a3ae1 100644 --- a/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md +++ b/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md @@ -65,8 +65,8 @@ Currently, only users with the ADMIN role can perform this operation. #### Related Documentation -[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-functions/jobs.md), -[TVF-TASKS](../../../sql-functions/table-functions/tasks.md) +[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-valued-functions/jobs.md), +[TVF-TASKS](../../../sql-functions/table-valued-functions/tasks.md) ### Grammar @@ -167,4 +167,4 @@ CREATE JOB my_job ON SCHEDULE EVERY 1 DAY STARTS '2020-01-01 00:00:00' ENDS '202 ### Keywords - CREATE, JOB, SCHEDULE \ No newline at end of file + CREATE, JOB, SCHEDULE diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current.json b/i18n/zh-CN/docusaurus-plugin-content-docs/current.json index f6fbdd1bb24cd..e0c3935306dfd 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current.json +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current.json @@ -295,6 +295,10 @@ "message": "表函数", "description": "The label for category Table Functions in sidebar docs" }, + "sidebar.docs.category.Table Valued Functions": { + "message": "表值函数", + "description": "The label for category Table Valued Functions in sidebar docs" + }, "sidebar.docs.category.Analytic(Window) Functions": { "message": "分析(窗口)函数", "description": "The label for category Analytic(Window) Functions in sidebar docs" @@ -427,4 +431,4 @@ "message": "使用教程", "description": "The label for category BI and Database IDE in sidebar docs" } -} \ No newline at end of file +} diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/backends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/backends.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/backends.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/backends.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/catalogs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/catalogs.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/catalogs.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/catalogs.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/frontends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/frontends.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/frontends.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/frontends.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/frontends_disks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/frontends_disks.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/frontends_disks.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/frontends_disks.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/hdfs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/hdfs.md similarity index 96% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/hdfs.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/hdfs.md index ea8e6343ad99f..d71923b184e1c 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/hdfs.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -84,6 +84,10 @@ hdfs( 其他参数: - `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如/path/to/city=beijing/date="2023-07-09", 则填写`path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 - `resource`:(选填)指定 Resource 名,HDFS TVF 可以利用已有的 HFDS Resource 来直接访问 HDFS。创建 HDFS Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限 +::: ### Examples diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/iceberg-meta.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/iceberg-meta.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/jobs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/jobs.md similarity index 99% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/jobs.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/jobs.md index 79b8ff2ad3ea5..19885eae4d3d8 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/jobs.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -28,10 +28,7 @@ under the License. ### Name -:::tip jobs -- since 2.1 -::: ### description @@ -39,6 +36,8 @@ jobs 该函数用于 from 子句中。 +该函数自 2.1.0 版本支持。 + #### syntax `jobs("type"="")` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/local.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/local.md similarity index 99% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/local.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/local.md index 411a688a5eade..7a3d945fe69cb 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/local.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/local.md @@ -1,6 +1,6 @@ --- { - "title": "local", + "title": "LOCAL", "language": "zh-CN" } --- diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/mv_infos.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/mv_infos.md similarity index 98% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/mv_infos.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/mv_infos.md index ee9064ded0d3a..67c7b58ffcb9d 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/mv_infos.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -36,6 +36,8 @@ mv_infos 该函数用于 from 子句中。 +该函数自 2.1.0 版本支持。 + #### syntax `mv_infos("database"="")` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/partitions.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..ce25fc0240cd3 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "zh-CN" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +表函数,生成分区临时表,可以查看某个 TABLE 的分区列表。 + +该函数用于 From 子句中。 + +该函数自 2.1.5 版本开始支持。 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions()表结构: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:分区id +* PartitionName:分区名字 +* VisibleVersion:分区版本 +* VisibleVersionTime:分区版本提交时间 +* State:分区状态 +* PartitionKey:分区key +* Range:分区范围 +* DistributionKey:分布key +* Buckets:分桶数量 +* ReplicationNum:副本数 +* StorageMedium:存储介质 +* CooldownTime:cooldown时间 +* RemoteStoragePolicy:远程存储策略 +* LastConsistencyCheckTime:上次一致性检查时间 +* DataSize:数据大小 +* IsInMemory:是否存在内存 +* ReplicaAllocation:分布策略 +* IsMutable:是否可变 +* SyncWithBaseTables:是否和基表数据同步(针对异步物化视图的分区) +* UnsyncTables:和哪个基表数据不同步(针对异步物化视图的分区) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:分区名字 + +### Example + +1. 查看 internal CATALOG 下 db1 的 table1 的分区列表 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. 查看 table1 下的分区名称为 partition1 的分区信息 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. 查看 table1 下的分区名称为 partition1 的分区 id + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/query.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/query.md similarity index 100% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/query.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/query.md diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/s3.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/s3.md similarity index 99% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/s3.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/s3.md index b5c718d271b91..8dd80cab100ee 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/s3.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/s3.md @@ -97,6 +97,10 @@ S3 TVF 中的每一个参数都是一个 `"key"="value"` 对。 - `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如 `/path/to/city=beijing/date="2023-07-09"`, 则填写 `path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 - `resource`:(选填)指定 Resource 名,S3 TVF 可以利用已有的 S3 Resource 来直接访问 S3。创建 S3 Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限。 +::: + ### Example 读取并访问 S3 兼容的对象存储上的 CSV 格式文件 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/tasks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/tasks.md similarity index 98% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/tasks.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/tasks.md index 75284360de298..5cdeac64e21dc 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/tasks.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -36,6 +36,8 @@ tasks 该函数用于 from 子句中。 +该函数自 2.1.0 版本支持。 + #### syntax `tasks("type"="insert");` @@ -163,4 +165,4 @@ mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; ### keywords - tasks, job, insert, mv, materilized view \ No newline at end of file + tasks, job, insert, mv, materilized view diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0.json b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0.json index 1ecfb7894019c..dd17024688e5d 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0.json +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0.json @@ -287,6 +287,10 @@ "message": "表函数", "description": "The label for category Table Functions in sidebar docs" }, + "sidebar.docs.category.Table Valued Functions": { + "message": "表值函数", + "description": "The label for category Table Valued Functions in sidebar docs" + }, "sidebar.docs.category.Analytic(Window) Functions": { "message": "分析(窗口)函数", "description": "The label for category Analytic(Window) Functions in sidebar docs" @@ -375,4 +379,4 @@ "message": "使用教程", "description": "The label for category BI and Database IDE in sidebar docs" } -} \ No newline at end of file +} diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..b87bbfe36aa7f --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,111 @@ +--- +{ + "title": "BACKENDS", + "language": "zh-CN" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +表函数,生成 backends 临时表,可以查看当前 doris 集群中的 BE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`backends()` + +backends() 表结构: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +`backends()` tvf 展示出来的信息基本与 `show backends` 语句展示出的信息一致,但是 `backends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `backends()` tvf 信息展示进行了鉴权,与 `show backends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..c7c5c964c0433 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,92 @@ +--- +{ + "title": "CATALOGS", + "language": "zh-CN" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +表函数,生成 catalogs 临时表,可以查看当前doris中的创建的 catalogs 信息。 + +该函数用于 from 子句中。 + +#### syntax + +`catalogs()` + +catalogs()表结构: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +`catalogs()` tvf展示的信息是综合了 `show catalogs` 与 `show catalog xxx` 语句的结果。 + +可以利用tvf生成的表去做过滤、join等操作。 + + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..463fd80f52828 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,101 @@ +--- +{ + "title": "FRONTENDS", + "language": "zh-CN" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +表函数,生成 frontends 临时表,可以查看当前 doris 集群中的 FE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends()` + +frontends() 表结构: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +`frontends()` tvf 展示出来的信息基本与 `show frontends` 语句展示出的信息一致,但是 `frontends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends()` tvf 信息展示进行了鉴权,与 `show frontends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..835d1eed135e5 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,86 @@ +--- +{ + "title": "frontends_disks", + "language": "zh-CN" +} +--- + + + +## `frontends_disks` + +### Name + +frontends_disks + +### description + +表函数,生成 frontends_disks 临时表,可以查看当前 doris 集群中的 FE 节点的磁盘信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends_disks()` + +frontends_disks() 表结构: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +`frontends_disks()` tvf 展示出来的信息基本与 `show frontends disks` 语句展示出的信息一致,但是 `frontends_disks()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends_disks()` tvf 信息展示进行了鉴权,与 `show frontends disks` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..d71923b184e1c --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,151 @@ +--- +{ + "title": "HDFS", + "language": "zh-CN" +} +--- + + + +## HDFS + +### Description + +HDFS 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 HDFS 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +#### syntax +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +访问 HDFS 相关参数: +- `uri`:(必填)访问 HDFS 的 uri。如果 uri 路径不存在或文件都是空文件,HDFS TVF 将返回空集合。 +- `fs.defaultFS`:(必填) +- `hadoop.username`:(必填)可以是任意字符串,但不能为空 +- `hadoop.security.authentication`:(选填) +- `hadoop.username`:(选填) +- `hadoop.kerberos.principal`:(选填) +- `hadoop.kerberos.keytab`:(选填) +- `dfs.client.read.shortcircuit`:(选填) +- `dfs.domain.socket.path`:(选填) + +访问 HA 模式 HDFS 相关参数: +- `dfs.nameservices`:(选填) +- `dfs.ha.namenodes.your-nameservices`:(选填) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`:(选填) +- `dfs.client.failover.proxy.provider.your-nameservices`:(选填) + +文件格式相关参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + + 下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[JSON Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `json_paths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + + 下面 2 个参数用于 CSV 格式的导入: + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 Format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如/path/to/city=beijing/date="2023-07-09", 则填写`path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,HDFS TVF 可以利用已有的 HFDS Resource 来直接访问 HDFS。创建 HDFS Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限 +::: + +### Examples + +读取并访问 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +读取并访问 HA 模式的 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +可以配合 `desc function` 使用。 + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + HDFS, table-valued-function, TVF + +### Best Practice + + 关于 HDFS TVF 的更详细使用方法可以参照 [S3](./s3.md) TVF, 唯一不同的是访问存储系统的方式不一样。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..30cdfb54158c8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,97 @@ +--- +{ +"title": "ICEBERG_META", +"language": "zh-CN" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta 表函数(table-valued-function,tvf),可以用于读取 iceberg 表的各类元数据信息,如操作历史、生成的快照、文件元数据等。 + +#### syntax +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**参数说明** + +iceberg_meta 表函数 tvf 中的每一个参数都是一个 `"key"="value"` 对。 +相关参数: +- `table`: (必填) 完整的表名,需要按照目录名。库名。表名的格式,填写需要查看的 iceberg 表名。 +- `query_type`: (必填) 想要查看的元数据类型,目前仅支持 snapshots。 + +### Example + +读取并访问 iceberg 表格式的 snapshots 元数据。 + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +可以配合`desc function`使用 + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +查看 iceberg 表的 snapshots + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +根据 snapshot_id 字段筛选 + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..7a3d945fe69cb --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,192 @@ +--- +{ + "title": "LOCAL", + "language": "zh-CN" +} +--- + + + +## local + +### Name + +local + +### Description + +Local表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 be 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +该函数需要 ADMIN 权限。 + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +- 访问local文件的相关参数: + + - `file_path` + + (必填)待读取文件的路径,该路径是一个相对于 `user_files_secure_path` 目录的相对路径, 其中 `user_files_secure_path` 参数是 [be的一个配置项](../../../admin-manual/config/be-config.md) 。 + + 路径中不能包含 `..`,可以使用 glob 语法进行模糊匹配,如:`logs/*.log` + +- 执行方式相关: + + 在 2.1.1 之前的版本中,Doris 仅支持指定某一个 BE 节点,读取该节点上的本地数据文件。 + + - `backend_id`: + + 文件所在的 be id。 `backend_id` 可以通过 `show backends` 命令得到。 + + 从 2.1.2 版本开始,Doris 增加了新的参数 `shared_storage`。 + + - `shared_storage` + + 默认为 false。如果为 true,表示指定的文件存在于共享存储上(比如 NAS)。共享存储必须兼容 POXIS 文件接口,并且同时挂载在所有 BE 节点上。 + + 当 `shared_storage` 为 true 时,可以不设置 `backend_id`,Doris 可能会利用到所有 BE 节点进行数据访问。如果设置了 `backend_id`,则仍然仅在指定 BE 节点上执行。 + +- 文件格式相关参数: + + - `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`:(选填) 列分割符, 默认为`,`。 + - `line_delimiter`:(选填) 行分割符,默认为`\n`。 + - `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。 默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +- 以下参数适用于json格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (选填) 默认为 `"true"` + - `strip_outer_array`: (选填) 默认为 `"false"` + - `json_root`: (选填) 默认为空 + - `json_paths`: (选填) 默认为空 + - `num_as_string`: (选填) 默认为 `false` + - `fuzzy_parse`: (选填) 默认为 `false` + +- 以下参数适用于csv格式的导入: + + - `trim_double_quotes`: 布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 csv 文件每个字段最外层的双引号 + - `skip_lines`: 整数类型,选填,默认值为0,含义为跳过csv文件的前几行。当设置format设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +### Examples + +分析指定 BE 上的日志文件: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +读取和访问位于路径`${DORIS_HOME}/student.csv`的 csv格式文件: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +访问 NAS 上的共享数据: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +可以配合`desc function`使用 + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + + local, table-valued-function, tvf + +### Best Practice + +- 关于 local tvf 的更详细使用方法可以参照 [S3](./s3.md) tvf, 唯一不同的是访问存储系统的方式不一样。 + +- 通过 local tvf 访问 NAS 上的数据 + + NAS 共享存储允许同时挂载到多个节点。每个节点都可以像访问本地文件一样访问共享存储中的文件。因此,可以将 NAS 视为本地文件系统,通过 local tvf 进行访问。 + + 当设置 `"shared_storage" = "true"` 时,Doris 会认为所指定的文件可以在任意 BE 节点访问。当使用通配符指定了一组文件时,Doris 会将访问文件的请求分发到多个 BE 节点上,这样可以利用多个节点的进行分布式文件扫描,提升查询性能。 + + + + + + + + + diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..ce25fc0240cd3 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "zh-CN" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +表函数,生成分区临时表,可以查看某个 TABLE 的分区列表。 + +该函数用于 From 子句中。 + +该函数自 2.1.5 版本开始支持。 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions()表结构: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:分区id +* PartitionName:分区名字 +* VisibleVersion:分区版本 +* VisibleVersionTime:分区版本提交时间 +* State:分区状态 +* PartitionKey:分区key +* Range:分区范围 +* DistributionKey:分布key +* Buckets:分桶数量 +* ReplicationNum:副本数 +* StorageMedium:存储介质 +* CooldownTime:cooldown时间 +* RemoteStoragePolicy:远程存储策略 +* LastConsistencyCheckTime:上次一致性检查时间 +* DataSize:数据大小 +* IsInMemory:是否存在内存 +* ReplicaAllocation:分布策略 +* IsMutable:是否可变 +* SyncWithBaseTables:是否和基表数据同步(针对异步物化视图的分区) +* UnsyncTables:和哪个基表数据不同步(针对异步物化视图的分区) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:分区名字 + +### Example + +1. 查看 internal CATALOG 下 db1 的 table1 的分区列表 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. 查看 table1 下的分区名称为 partition1 的分区信息 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. 查看 table1 下的分区名称为 partition1 的分区 id + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..8dd80cab100ee --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,564 @@ +--- +{ + "title": "S3", + "language": "zh-CN" +} +--- + + + +## S3 + +### Name + +s3 + +### description + +S3 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 S3 兼容的对象存储上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +**语法** + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**参数说明** + +S3 TVF 中的每一个参数都是一个 `"key"="value"` 对。 +访问 S3 相关参数: +- `uri`: (必填) 访问 S3 的 URI,S3 表函数会根据 `use_path_style` 参数来决定是否使用 Path Style 访问方式,默认为 Virtual-hosted Style 方式 +- `s3.access_key`: (必填) +- `s3.secret_key`: (必填) +- `s3.region`: (选填)。如果 Minio 服务设置了其他的 Region,那么必填,否则默认使用`us-east-1`。 +- `s3.session_token`: (选填) +- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 Virtual-hosted Syle 方式。但某些对象存储系统可能没开启或没支持 Virtual-hosted Style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 Path Style 方式。比如 `minio` 默认情况下只允许 `path style` 访问方式,所以在访问 MinIO 时要加上 `use_path_style=true`。 +- `force_parsing_by_standard_uri`:(选填)默认 `false` 。我们可以添加 `force_parsing_by_standard_uri` 参数来强制将非标准的 URI 解析为标准 URI。 + +> 对于 AWS S3,标准 uri styles 有以下几种: +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88`。 +> 2. Virtual Host Style:`https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> 3. Path Style:`https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> +> 除了支持以上三个标准常见的 URI Styles, 还支持其他一些 URI Styles(也许不常见,但也有可能有): +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> 详细使用案例可以参考最下方 Best Practice。 + +文件格式参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `jsonpaths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + +下面 2 个参数是用于 CSV 格式的导入 + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如 `/path/to/city=beijing/date="2023-07-09"`, 则填写 `path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,S3 TVF 可以利用已有的 S3 Resource 来直接访问 S3。创建 S3 Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限。 +::: + +### Example + +读取并访问 S3 兼容的对象存储上的 CSV 格式文件 + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + + +可以配合 `desc function` 使用 + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + S3, table-valued-function, TVF + +### Best Practice + +**不同 url schema 的写法** +http:// 、https:// 使用示例: +```sql +// 注意URI Bucket写法以及`use_path_style`参数设置,HTTP 同理。 +// 由于设置了 `"use_path_style"="true"`, 所以将采用 Path Style 的方式访问 S3。 +select * from s3( + "uri" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// 注意 URI Bucket写法以及use_path_style参数设置,http同理。 +// 由于设置了 `"use_path_style"="false"`, 所以将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "https://bucket.endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// 阿里云 OSS 和腾讯云 COS 采用 Virtual-hosted Style 方式访问 S3。 +// OSS +select * from s3( + "uri" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "uri" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "http://172.21.0.101:9000", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +// 百度云 BOS 采用兼容 S3 协议的 Virtual-hosted Style 方式访问 S3。 +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +s3:// 使用示例: + +```sql +// 注意 URI Bucket 写法, 无需设置 `use_path_style` 参数。 +// 将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region"= "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +其它支持的 URI 风格示例: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = false` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = true` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + + +**CSV format** +由于 S3 table-valued-function 事先并不知道 Table Schema,所以会先读一遍文件来解析出 Table Schema。 + +`csv` 格式:S3 table-valued-function 读取 S3 上的文件并当作 CSV 文件来处理,读取文件中的第一行用于解析 Table Schema。文件第一行的列个数 `n` 将作为 Table Schema 的列个数,Table Schema 的列名则自动取名为 `c1, c2, ..., cn` ,列类型都设置为 `String`, 举例: + +student1.csv 文件内容为: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +可以配合 `desc function S3()` 来查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` 格式:解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 `String`, 举例: + +student_with_names.csv 文件内容为 + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types foramt** + +`csv_with_names_and_types` 格式:目前暂不支持从 CSV 文件中解析出 Column Type。使用该 Format 时,S3 TVF 会解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 String,同时将忽略该文件的第二行。 + +`student_with_names_and_types.csv` 文件内容为 + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**JSON format** + +`json` 格式:JSON 格式涉及到较多的可选参数,各个参数的意义可以参考:[Json Load](../../../data-operate/import/import-way/load-json-format.md)。S3 TVF 查询 JSON 格式文件时根据 `json_root` 和 `jsonpaths` 参数定位到一个 JSON 对象,将该对象的中的 `key` 作为 Table Schema 的列名,列类型都设置为 String。举例: + +data.json 文件 + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +使用 S3 TVF 查询 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**Parquet format** + +`parquet` 格式:S3 TVF 支持从 Parquet 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` 格式:和 `parquet` format 使用方法一致,将 `format` 参数设置为 `orc`。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` +**avro format** + +`avro` 格式:S3 TVF 支持从 avro 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**URI 包含通配符** + +URI 可以使用通配符来读取多个文件。注意:如果使用通配符要保证各个文件的格式是一致的 (尤其是 `csv`/`csv_with_names`/`csv_with_names_and_types` 算做不同的格式),S3 TVF 用第一个文件来解析出 Table Schema。 +如下两个 CSV 文件: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +可以在 URI 上使用通配符来导入。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**配合 `insert into` 和 `cast` 使用 `S3` TVF** + +```sql +// 创建 Doris 内部表 +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// 使用 S3 插入数据 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/workload-group.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md similarity index 94% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/workload-group.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md index 19804b00c152e..28834e806fecc 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/workload-group.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md @@ -31,7 +31,7 @@ under the License. workload_groups :::caution -自 2.1.1 起,此表函数移到 information_schema.workload_groups 表。 +已废弃。自 2.1.1 起,此表函数移到 information_schema.workload_groups 表。 ::: ### description @@ -69,4 +69,4 @@ mysql> select * from workload_groups()\G ### keywords - workload_groups \ No newline at end of file + workload_groups diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1.json b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1.json index 57d4b1852e9aa..27708e2bd1db3 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1.json +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1.json @@ -307,6 +307,10 @@ "message": "表函数", "description": "The label for category Table Functions in sidebar docs" }, + "sidebar.docs.category.Table Valued Functions": { + "message": "表值函数", + "description": "The label for category Table Valued Functions in sidebar docs" + }, "sidebar.docs.category.Analytic(Window) Functions": { "message": "分析(窗口)函数", "description": "The label for category Analytic(Window) Functions in sidebar docs" @@ -415,4 +419,4 @@ "message": "使用教程", "description": "The label for category BI and Database IDE in sidebar docs" } -} \ No newline at end of file +} diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md index ea8e6343ad99f..1013a2146117d 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md @@ -85,6 +85,10 @@ hdfs( - `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如/path/to/city=beijing/date="2023-07-09", 则填写`path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 - `resource`:(选填)指定 Resource 名,HDFS TVF 可以利用已有的 HFDS Resource 来直接访问 HDFS。创建 HDFS Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 VIEW ,需要拥有该 RESOURCE 的 USAGE 权限,查询基于 TVF 创建的 VIEW ,只需要该 VIEW 的 SELECT 权限 +::: + ### Examples 读取并访问 HDFS 存储上的 CSV 格式文件 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md new file mode 100644 index 0000000000000..eb3705cd90310 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md @@ -0,0 +1,128 @@ +--- +{ + "title": "PARTITIONS", + "language": "zh-CN" +} +--- + + + +## `partitions` + +### Name + +partitions + +### description + +表函数,生成分区临时表,可以查看某个 TABLE 的分区列表。 + +该函数用于 From 子句中。 + +#### syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions()表结构: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:分区id +* PartitionName:分区名字 +* VisibleVersion:分区版本 +* VisibleVersionTime:分区版本提交时间 +* State:分区状态 +* PartitionKey:分区key +* Range:分区范围 +* DistributionKey:分布key +* Buckets:分桶数量 +* ReplicationNum:副本数 +* StorageMedium:存储介质 +* CooldownTime:cooldown时间 +* RemoteStoragePolicy:远程存储策略 +* LastConsistencyCheckTime:上次一致性检查时间 +* DataSize:数据大小 +* IsInMemory:是否存在内存 +* ReplicaAllocation:分布策略 +* IsMutable:是否可变 +* SyncWithBaseTables:是否和基表数据同步(针对异步物化视图的分区) +* UnsyncTables:和哪个基表数据不同步(针对异步物化视图的分区) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:分区名字 + +### example + +1. 查看 internal CATALOG 下 db1 的 table1 的分区列表 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. 查看 table1 下的分区名称为 partition1 的分区信息 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. 查看 table1 下的分区名称为 partition1 的分区 id + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### keywords + + partitions diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md index b5c718d271b91..8dd80cab100ee 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md @@ -97,6 +97,10 @@ S3 TVF 中的每一个参数都是一个 `"key"="value"` 对。 - `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如 `/path/to/city=beijing/date="2023-07-09"`, 则填写 `path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 - `resource`:(选填)指定 Resource 名,S3 TVF 可以利用已有的 S3 Resource 来直接访问 S3。创建 S3 Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限。 +::: + ### Example 读取并访问 S3 兼容的对象存储上的 CSV 格式文件 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/active_queries.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md similarity index 96% rename from i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/active_queries.md rename to i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md index b74e2acd38c8a..c6fc46d6e440f 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/table-functions/active_queries.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md @@ -31,7 +31,7 @@ under the License. active_queries :::caution -自 2.1.1 起,此表函数移到 information_schema.active_queries 表。 +已废弃。自 2.1.1 起,此表函数移到 information_schema.active_queries 表。 ::: ### description diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..b87bbfe36aa7f --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,111 @@ +--- +{ + "title": "BACKENDS", + "language": "zh-CN" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +表函数,生成 backends 临时表,可以查看当前 doris 集群中的 BE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`backends()` + +backends() 表结构: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +`backends()` tvf 展示出来的信息基本与 `show backends` 语句展示出的信息一致,但是 `backends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `backends()` tvf 信息展示进行了鉴权,与 `show backends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..c7c5c964c0433 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,92 @@ +--- +{ + "title": "CATALOGS", + "language": "zh-CN" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +表函数,生成 catalogs 临时表,可以查看当前doris中的创建的 catalogs 信息。 + +该函数用于 from 子句中。 + +#### syntax + +`catalogs()` + +catalogs()表结构: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +`catalogs()` tvf展示的信息是综合了 `show catalogs` 与 `show catalog xxx` 语句的结果。 + +可以利用tvf生成的表去做过滤、join等操作。 + + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..463fd80f52828 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,101 @@ +--- +{ + "title": "FRONTENDS", + "language": "zh-CN" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +表函数,生成 frontends 临时表,可以查看当前 doris 集群中的 FE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends()` + +frontends() 表结构: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +`frontends()` tvf 展示出来的信息基本与 `show frontends` 语句展示出的信息一致,但是 `frontends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends()` tvf 信息展示进行了鉴权,与 `show frontends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..835d1eed135e5 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,86 @@ +--- +{ + "title": "frontends_disks", + "language": "zh-CN" +} +--- + + + +## `frontends_disks` + +### Name + +frontends_disks + +### description + +表函数,生成 frontends_disks 临时表,可以查看当前 doris 集群中的 FE 节点的磁盘信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends_disks()` + +frontends_disks() 表结构: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +`frontends_disks()` tvf 展示出来的信息基本与 `show frontends disks` 语句展示出的信息一致,但是 `frontends_disks()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends_disks()` tvf 信息展示进行了鉴权,与 `show frontends disks` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..d71923b184e1c --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,151 @@ +--- +{ + "title": "HDFS", + "language": "zh-CN" +} +--- + + + +## HDFS + +### Description + +HDFS 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 HDFS 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +#### syntax +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +访问 HDFS 相关参数: +- `uri`:(必填)访问 HDFS 的 uri。如果 uri 路径不存在或文件都是空文件,HDFS TVF 将返回空集合。 +- `fs.defaultFS`:(必填) +- `hadoop.username`:(必填)可以是任意字符串,但不能为空 +- `hadoop.security.authentication`:(选填) +- `hadoop.username`:(选填) +- `hadoop.kerberos.principal`:(选填) +- `hadoop.kerberos.keytab`:(选填) +- `dfs.client.read.shortcircuit`:(选填) +- `dfs.domain.socket.path`:(选填) + +访问 HA 模式 HDFS 相关参数: +- `dfs.nameservices`:(选填) +- `dfs.ha.namenodes.your-nameservices`:(选填) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`:(选填) +- `dfs.client.failover.proxy.provider.your-nameservices`:(选填) + +文件格式相关参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + + 下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[JSON Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `json_paths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + + 下面 2 个参数用于 CSV 格式的导入: + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 Format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如/path/to/city=beijing/date="2023-07-09", 则填写`path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,HDFS TVF 可以利用已有的 HFDS Resource 来直接访问 HDFS。创建 HDFS Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限 +::: + +### Examples + +读取并访问 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +读取并访问 HA 模式的 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +可以配合 `desc function` 使用。 + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + HDFS, table-valued-function, TVF + +### Best Practice + + 关于 HDFS TVF 的更详细使用方法可以参照 [S3](./s3.md) TVF, 唯一不同的是访问存储系统的方式不一样。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..30cdfb54158c8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,97 @@ +--- +{ +"title": "ICEBERG_META", +"language": "zh-CN" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta 表函数(table-valued-function,tvf),可以用于读取 iceberg 表的各类元数据信息,如操作历史、生成的快照、文件元数据等。 + +#### syntax +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**参数说明** + +iceberg_meta 表函数 tvf 中的每一个参数都是一个 `"key"="value"` 对。 +相关参数: +- `table`: (必填) 完整的表名,需要按照目录名。库名。表名的格式,填写需要查看的 iceberg 表名。 +- `query_type`: (必填) 想要查看的元数据类型,目前仅支持 snapshots。 + +### Example + +读取并访问 iceberg 表格式的 snapshots 元数据。 + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +可以配合`desc function`使用 + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +查看 iceberg 表的 snapshots + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +根据 snapshot_id 字段筛选 + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md new file mode 100644 index 0000000000000..19885eae4d3d8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -0,0 +1,146 @@ +--- +{ + "title": "JOBS", + "language": "zh-CN" +} +--- + + + +## `jobs` + +### Name + +jobs + +### description + +表函数,生成任务临时表,可以查看某个任务类型中的job信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`jobs("type"="")` + +**参数说明** + +| 参数名 | 说明 | 类型 | 是否必填 | +|:-----|:-----|:-------|:-----| +| type | 作业类型 | string | 是 | + +type 支持的类型: + +- insert:insert into 类型的任务。 +- mv:物化视图类型的任务。 +##### Insert 任务 +jobs("type"="insert")表结构: +```sql +mysql> desc function jobs("type"="insert"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| Definer | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ExecuteSql | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| SucceedTaskCount | TEXT | No | false | NULL | NONE | +| FailedTaskCount | TEXT | No | false | NULL | NONE | +| CanceledTaskCount | TEXT | No | false | NULL | NONE | +| Comment | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` +* Id:job id. +* Name:job名称. +* Definer:job定义者. +* ExecuteType:执行类型 +* RecurringStrategy:循环策略 +* Status:job状态 +* ExecuteSql:执行SQL +* CreateTime:job 创建时间 +* SucceedTaskCount:成功任务数量 +* FailedTaskCount:失败任务数量 +* CanceledTaskCount:取消任务数量 +* Comment:job 注释 +##### 物化视图任务 +jobs("type"="mv")表结构: +```sql +mysql> desc function jobs("type"="mv"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +10 rows in set (0.00 sec) +``` + +* Id:job id. +* Name:job名称. +* MvId:物化视图id +* MvName:物化视图名称 +* MvDatabaseId:物化视图所属db id +* MvDatabaseName:物化视图所属db名称 +* ExecuteType:执行类型 +* RecurringStrategy:循环策略 +* Status:job状态 +* CreateTime:task创建时间 + +### example + +1. 查看所有物化视图的job + +```sql +mysql> select * from jobs("type"="mv"); +``` + +2. 查看 name 为`inner_mtmv_75043`的 job + +```sql +mysql> select * from jobs("type"="mv") where Name="inner_mtmv_75043"; +``` +3. 查看所有 insert 任务 + +```sql +mysql> select * from jobs("type"="insert"); +``` +4. 查看 name 为`one_insert_job`的 job + +```sql +mysql> select * from jobs("type"="insert") where Name='one_insert_job'; +``` +### keywords + + jobs, job, insert, mv, materialized view, schedule diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..7a3d945fe69cb --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,192 @@ +--- +{ + "title": "LOCAL", + "language": "zh-CN" +} +--- + + + +## local + +### Name + +local + +### Description + +Local表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 be 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +该函数需要 ADMIN 权限。 + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +- 访问local文件的相关参数: + + - `file_path` + + (必填)待读取文件的路径,该路径是一个相对于 `user_files_secure_path` 目录的相对路径, 其中 `user_files_secure_path` 参数是 [be的一个配置项](../../../admin-manual/config/be-config.md) 。 + + 路径中不能包含 `..`,可以使用 glob 语法进行模糊匹配,如:`logs/*.log` + +- 执行方式相关: + + 在 2.1.1 之前的版本中,Doris 仅支持指定某一个 BE 节点,读取该节点上的本地数据文件。 + + - `backend_id`: + + 文件所在的 be id。 `backend_id` 可以通过 `show backends` 命令得到。 + + 从 2.1.2 版本开始,Doris 增加了新的参数 `shared_storage`。 + + - `shared_storage` + + 默认为 false。如果为 true,表示指定的文件存在于共享存储上(比如 NAS)。共享存储必须兼容 POXIS 文件接口,并且同时挂载在所有 BE 节点上。 + + 当 `shared_storage` 为 true 时,可以不设置 `backend_id`,Doris 可能会利用到所有 BE 节点进行数据访问。如果设置了 `backend_id`,则仍然仅在指定 BE 节点上执行。 + +- 文件格式相关参数: + + - `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`:(选填) 列分割符, 默认为`,`。 + - `line_delimiter`:(选填) 行分割符,默认为`\n`。 + - `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。 默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +- 以下参数适用于json格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (选填) 默认为 `"true"` + - `strip_outer_array`: (选填) 默认为 `"false"` + - `json_root`: (选填) 默认为空 + - `json_paths`: (选填) 默认为空 + - `num_as_string`: (选填) 默认为 `false` + - `fuzzy_parse`: (选填) 默认为 `false` + +- 以下参数适用于csv格式的导入: + + - `trim_double_quotes`: 布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 csv 文件每个字段最外层的双引号 + - `skip_lines`: 整数类型,选填,默认值为0,含义为跳过csv文件的前几行。当设置format设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +### Examples + +分析指定 BE 上的日志文件: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +读取和访问位于路径`${DORIS_HOME}/student.csv`的 csv格式文件: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +访问 NAS 上的共享数据: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +可以配合`desc function`使用 + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + + local, table-valued-function, tvf + +### Best Practice + +- 关于 local tvf 的更详细使用方法可以参照 [S3](./s3.md) tvf, 唯一不同的是访问存储系统的方式不一样。 + +- 通过 local tvf 访问 NAS 上的数据 + + NAS 共享存储允许同时挂载到多个节点。每个节点都可以像访问本地文件一样访问共享存储中的文件。因此,可以将 NAS 视为本地文件系统,通过 local tvf 进行访问。 + + 当设置 `"shared_storage" = "true"` 时,Doris 会认为所指定的文件可以在任意 BE 节点访问。当使用通配符指定了一组文件时,Doris 会将访问文件的请求分发到多个 BE 节点上,这样可以利用多个节点的进行分布式文件扫描,提升查询性能。 + + + + + + + + + diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md new file mode 100644 index 0000000000000..67c7b58ffcb9d --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -0,0 +1,102 @@ +--- +{ + "title": "MV_INFOS", + "language": "zh-CN" +} +--- + + + +## `mv_infos` + +### Name + +mv_infos + +### description + +表函数,生成异步物化视图临时表,可以查看某个db中创建的异步物化视图信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`mv_infos("database"="")` + +mv_infos()表结构: +```sql +mysql> desc function mv_infos("database"="tpch100"); ++--------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------+---------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| SchemaChangeDetail | TEXT | No | false | NULL | NONE | +| RefreshState | TEXT | No | false | NULL | NONE | +| RefreshInfo | TEXT | No | false | NULL | NONE | +| QuerySql | TEXT | No | false | NULL | NONE | +| EnvInfo | TEXT | No | false | NULL | NONE | +| MvProperties | TEXT | No | false | NULL | NONE | +| MvPartitionInfo | TEXT | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | ++--------------------+---------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` + +* Id:物化视图id +* Name:物化视图Name +* JobName:物化视图对应的job名称 +* State:物化视图状态 +* SchemaChangeDetail:物化视图State变为SchemaChange的原因 +* RefreshState:物化视图刷新状态 +* RefreshInfo:物化视图定义的刷新策略信息 +* QuerySql:物化视图定义的查询语句 +* EnvInfo:物化视图创建时的环境信息 +* MvProperties:物化视属性 +* MvPartitionInfo:物化视图的分区信息 +* SyncWithBaseTables:是否和base表数据同步,如需查看哪个分区不同步,请使用[SHOW PARTITIONS](../sql-reference/Show-Statements/SHOW-PARTITIONS.md) + +### example + +1. 查看db1下的所有物化视图 + +```sql +mysql> select * from mv_infos("database"="db1"); +``` + +2. 查看db1下的物化视图名称为mv1的物化视图 + +```sql +mysql> select * from mv_infos("database"="db1") where Name = "mv1"; +``` + +3. 查看db1下的物化视图名称为mv1的状态 + +```sql +mysql> select State from mv_infos("database"="db1") where Name = "mv1"; +``` + +### keywords + + mv, infos diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..ce25fc0240cd3 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "zh-CN" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +表函数,生成分区临时表,可以查看某个 TABLE 的分区列表。 + +该函数用于 From 子句中。 + +该函数自 2.1.5 版本开始支持。 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions()表结构: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:分区id +* PartitionName:分区名字 +* VisibleVersion:分区版本 +* VisibleVersionTime:分区版本提交时间 +* State:分区状态 +* PartitionKey:分区key +* Range:分区范围 +* DistributionKey:分布key +* Buckets:分桶数量 +* ReplicationNum:副本数 +* StorageMedium:存储介质 +* CooldownTime:cooldown时间 +* RemoteStoragePolicy:远程存储策略 +* LastConsistencyCheckTime:上次一致性检查时间 +* DataSize:数据大小 +* IsInMemory:是否存在内存 +* ReplicaAllocation:分布策略 +* IsMutable:是否可变 +* SyncWithBaseTables:是否和基表数据同步(针对异步物化视图的分区) +* UnsyncTables:和哪个基表数据不同步(针对异步物化视图的分区) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:分区名字 + +### Example + +1. 查看 internal CATALOG 下 db1 的 table1 的分区列表 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. 查看 table1 下的分区名称为 partition1 的分区信息 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. 查看 table1 下的分区名称为 partition1 的分区 id + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md new file mode 100644 index 0000000000000..3e7e715db076b --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md @@ -0,0 +1,110 @@ +--- +{ +"title": "QUERY", +"language": "zh-CN" +} +--- + + + +## query + +### Name + +query + +### description + +query 表函数(table-valued-function,tvf),可用于将查询语句直接透传到某个 catalog 进行数据查询 + +:::info note +Doris 2.1.3 版本开始支持,当前仅支持透传查询 jdbc catalog。 +需要先在 Doris 中创建对应的 catalog。 +::: + +#### syntax + +```sql +query( + "catalog" = "catalog_name", + "query" = "select * from db_name.table_name where condition" + ); +``` + +**参数说明** + +query表函数 tvf中的每一个参数都是一个 `"key"="value"` 对。 +相关参数: +- `catalog`: (必填) catalog名称,需要按照catalog的名称填写。 +- `query`: (必填) 需要执行的查询语句。 + +### Example + +使用 query 函数查询 jdbc 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +可以配合`desc function`使用 + +```sql +desc function query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +### Keywords + + query, table-valued-function, tvf + +### Best Prac + +透传查询 jdbc catalog 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from test.student"); ++------+---------+ +| id | name | ++------+---------+ +| 1 | alice | +| 2 | bob | +| 3 | jack | ++------+---------+ +select * from query("catalog" = "jdbc", "query" = "select * from test.score"); ++------+---------+ +| id | score | ++------+---------+ +| 1 | 100 | +| 2 | 90 | +| 3 | 80 | ++------+---------+ +``` + +透传关联查询 jdbc catalog 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select a.id, a.name, b.score from test.student a join test.score b on a.id = b.id"); ++------+---------+---------+ +| id | name | score | ++------+---------+---------+ +| 1 | alice | 100 | +| 2 | bob | 90 | +| 3 | jack | 80 | ++------+---------+---------+ +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..8dd80cab100ee --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,564 @@ +--- +{ + "title": "S3", + "language": "zh-CN" +} +--- + + + +## S3 + +### Name + +s3 + +### description + +S3 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 S3 兼容的对象存储上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +**语法** + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**参数说明** + +S3 TVF 中的每一个参数都是一个 `"key"="value"` 对。 +访问 S3 相关参数: +- `uri`: (必填) 访问 S3 的 URI,S3 表函数会根据 `use_path_style` 参数来决定是否使用 Path Style 访问方式,默认为 Virtual-hosted Style 方式 +- `s3.access_key`: (必填) +- `s3.secret_key`: (必填) +- `s3.region`: (选填)。如果 Minio 服务设置了其他的 Region,那么必填,否则默认使用`us-east-1`。 +- `s3.session_token`: (选填) +- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 Virtual-hosted Syle 方式。但某些对象存储系统可能没开启或没支持 Virtual-hosted Style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 Path Style 方式。比如 `minio` 默认情况下只允许 `path style` 访问方式,所以在访问 MinIO 时要加上 `use_path_style=true`。 +- `force_parsing_by_standard_uri`:(选填)默认 `false` 。我们可以添加 `force_parsing_by_standard_uri` 参数来强制将非标准的 URI 解析为标准 URI。 + +> 对于 AWS S3,标准 uri styles 有以下几种: +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88`。 +> 2. Virtual Host Style:`https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> 3. Path Style:`https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> +> 除了支持以上三个标准常见的 URI Styles, 还支持其他一些 URI Styles(也许不常见,但也有可能有): +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> 详细使用案例可以参考最下方 Best Practice。 + +文件格式参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `jsonpaths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + +下面 2 个参数是用于 CSV 格式的导入 + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如 `/path/to/city=beijing/date="2023-07-09"`, 则填写 `path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,S3 TVF 可以利用已有的 S3 Resource 来直接访问 S3。创建 S3 Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限。 +::: + +### Example + +读取并访问 S3 兼容的对象存储上的 CSV 格式文件 + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + + +可以配合 `desc function` 使用 + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + S3, table-valued-function, TVF + +### Best Practice + +**不同 url schema 的写法** +http:// 、https:// 使用示例: +```sql +// 注意URI Bucket写法以及`use_path_style`参数设置,HTTP 同理。 +// 由于设置了 `"use_path_style"="true"`, 所以将采用 Path Style 的方式访问 S3。 +select * from s3( + "uri" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// 注意 URI Bucket写法以及use_path_style参数设置,http同理。 +// 由于设置了 `"use_path_style"="false"`, 所以将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "https://bucket.endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// 阿里云 OSS 和腾讯云 COS 采用 Virtual-hosted Style 方式访问 S3。 +// OSS +select * from s3( + "uri" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "uri" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "http://172.21.0.101:9000", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +// 百度云 BOS 采用兼容 S3 协议的 Virtual-hosted Style 方式访问 S3。 +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +s3:// 使用示例: + +```sql +// 注意 URI Bucket 写法, 无需设置 `use_path_style` 参数。 +// 将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region"= "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +其它支持的 URI 风格示例: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = false` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = true` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + + +**CSV format** +由于 S3 table-valued-function 事先并不知道 Table Schema,所以会先读一遍文件来解析出 Table Schema。 + +`csv` 格式:S3 table-valued-function 读取 S3 上的文件并当作 CSV 文件来处理,读取文件中的第一行用于解析 Table Schema。文件第一行的列个数 `n` 将作为 Table Schema 的列个数,Table Schema 的列名则自动取名为 `c1, c2, ..., cn` ,列类型都设置为 `String`, 举例: + +student1.csv 文件内容为: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +可以配合 `desc function S3()` 来查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` 格式:解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 `String`, 举例: + +student_with_names.csv 文件内容为 + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types foramt** + +`csv_with_names_and_types` 格式:目前暂不支持从 CSV 文件中解析出 Column Type。使用该 Format 时,S3 TVF 会解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 String,同时将忽略该文件的第二行。 + +`student_with_names_and_types.csv` 文件内容为 + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**JSON format** + +`json` 格式:JSON 格式涉及到较多的可选参数,各个参数的意义可以参考:[Json Load](../../../data-operate/import/import-way/load-json-format.md)。S3 TVF 查询 JSON 格式文件时根据 `json_root` 和 `jsonpaths` 参数定位到一个 JSON 对象,将该对象的中的 `key` 作为 Table Schema 的列名,列类型都设置为 String。举例: + +data.json 文件 + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +使用 S3 TVF 查询 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**Parquet format** + +`parquet` 格式:S3 TVF 支持从 Parquet 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` 格式:和 `parquet` format 使用方法一致,将 `format` 参数设置为 `orc`。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` +**avro format** + +`avro` 格式:S3 TVF 支持从 avro 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**URI 包含通配符** + +URI 可以使用通配符来读取多个文件。注意:如果使用通配符要保证各个文件的格式是一致的 (尤其是 `csv`/`csv_with_names`/`csv_with_names_and_types` 算做不同的格式),S3 TVF 用第一个文件来解析出 Table Schema。 +如下两个 CSV 文件: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +可以在 URI 上使用通配符来导入。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**配合 `insert into` 和 `cast` 使用 `S3` TVF** + +```sql +// 创建 Doris 内部表 +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// 使用 S3 插入数据 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md new file mode 100644 index 0000000000000..5cdeac64e21dc --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -0,0 +1,168 @@ +--- +{ + "title": "TASKS", + "language": "zh-CN" +} +--- + + + +## `tasks` + +### Name + +tasks + +### description + +表函数,生成 tasks 临时表,可以查看当前 doris 集群中的 job 产生的 tasks 信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`tasks("type"="insert");` +**参数说明** + +| 参数名 | 说明 | 类型 | 是否必填 | +|:-----|:-------|:-------|:-----| +| type | 作业类型 | string | 是 | + +type 支持的类型: + +- insert:insert into 类型的任务。 +- mv: mv 类型的任务 + +##### Insert tasks +`tasks("type"="insert");`表结构: +``` +mysql> desc function tasks("type"="insert");; ++---------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| Label | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| TrackingUrl | TEXT | No | false | NULL | NONE | +| LoadStatistic | TEXT | No | false | NULL | NONE | +| User | TEXT | No | false | NULL | NONE | ++---------------+------+------+-------+---------+-------+ +11 row in set (0.01 sec) +``` +- TaskId:task id +- JobId:job id +- JobName:job 名称 +- Label:label +- Status:task 状态 +- ErrorMsg:task 失败信息 +- CreateTime:task 创建时间 +- FinishTime:task 结束时间 +- TrackingUrl:task tracking url +- LoadStatistic:task 统计信息 +- User:执行用户 +##### MV tasks +```sql +mysql> desc function tasks("type"="mv"); ++-----------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| StartTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| DurationMs | TEXT | No | false | NULL | NONE | +| TaskContext | TEXT | No | false | NULL | NONE | +| RefreshMode | TEXT | No | false | NULL | NONE | +| NeedRefreshPartitions | TEXT | No | false | NULL | NONE | +| CompletedPartitions | TEXT | No | false | NULL | NONE | +| Progress | TEXT | No | false | NULL | NONE | ++-----------------------+------+------+-------+---------+-------+ +18 rows in set (0.00 sec) +``` +* TaskId:task id +* JobId:job id +* JobName:job 名称 +* MvId:物化视图 id +* MvName:物化视图名称 +* MvDatabaseId:物化视图所属 db id +* MvDatabaseName:物化视图所属 db 名称 +* Status:task 状态 +* ErrorMsg:task 失败信息 +* CreateTime:task 创建时间 +* StartTime:task 开始运行时间 +* FinishTime:task 结束运行时间 +* DurationMs:task 运行时间 +* TaskContext:task 运行参数 +* RefreshMode:刷新模式 +* NeedRefreshPartitions:本次 task 需要刷新的分区信息 +* CompletedPartitions:本次 task 刷新完成的分区信息 +* Progress:task 运行进度 + +### example +#### Insert Tasks +``` +mysql> select * from tasks("type"="insert") limit 1 \G +*************************** 1. row *************************** + TaskId: 667704038678903 + JobId: 10069 + Label: 10069_667704038678903 + Status: FINISHED + EtlInfo: \N + TaskInfo: cluster:N/A; timeout(s):14400; max_filter_ratio:0.0; priority:NORMAL + ErrorMsg: \N + CreateTimeMs: 2023-12-08 16:46:57 + FinishTimeMs: 2023-12-08 16:46:57 + TrackingUrl: +LoadStatistic: {"Unfinished backends":{},"ScannedRows":0,"TaskNumber":0,"LoadBytes":0,"All backends":{},"FileNumber":0,"FileSize":0} + User: root +1 row in set (0.05 sec) + +``` +#### MV Tasks +1. 查看所有物化视图的 task + +```sql +mysql> select * from tasks("type"="mv"); +``` + +2. 查看 jobName 为`inner_mtmv_75043`的所有 task + +```sql +mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; +``` + +### keywords + + tasks, job, insert, mv, materilized view diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md new file mode 100644 index 0000000000000..28834e806fecc --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md @@ -0,0 +1,72 @@ +--- +{ + "title": "WORKLOAD_GROUPS", + "language": "zh-CN" +} +--- + + + +## `workload_groups` + +### Name + +workload_groups + +:::caution +已废弃。自 2.1.1 起,此表函数移到 information_schema.workload_groups 表。 +::: + +### description + +表函数,生成 workload_groups 临时表,可以查看当前用户具有权限的资源组信息。 + +该函数用于from子句中。 + +#### syntax +`workload_groups()` + +workload_groups()表结构: +``` +mysql> desc function workload_groups(); ++-------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+-------------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | STRING | No | false | NULL | NONE | +| Item | STRING | No | false | NULL | NONE | +| Value | STRING | No | false | NULL | NONE | ++-------+-------------+------+-------+---------+-------+ +``` + +### example +``` +mysql> select * from workload_groups()\G ++-------+--------+--------------+-------+ +| Id | Name | Item | Value | ++-------+--------+--------------+-------+ +| 11001 | normal | memory_limit | 100% | +| 11001 | normal | cpu_share | 10 | ++-------+--------+--------------+-------+ +``` + +### keywords + + workload_groups diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0.json b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0.json index e5a9624761060..4e06e794aa92b 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0.json +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0.json @@ -311,6 +311,10 @@ "message": "表函数", "description": "The label for category Table Functions in sidebar docs" }, + "sidebar.docs.category.Table Valued Functions": { + "message": "表值函数", + "description": "The label for category Table Valued Functions in sidebar docs" + }, "sidebar.docs.category.Analytic(Window) Functions": { "message": "分析(窗口)函数", "description": "The label for category Analytic(Window) Functions in sidebar docs" @@ -423,4 +427,4 @@ "message": "使用教程", "description": "The label for category BI and Database IDE in sidebar docs" } -} \ No newline at end of file +} diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..b87bbfe36aa7f --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,111 @@ +--- +{ + "title": "BACKENDS", + "language": "zh-CN" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +表函数,生成 backends 临时表,可以查看当前 doris 集群中的 BE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`backends()` + +backends() 表结构: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +`backends()` tvf 展示出来的信息基本与 `show backends` 语句展示出的信息一致,但是 `backends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `backends()` tvf 信息展示进行了鉴权,与 `show backends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..c7c5c964c0433 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,92 @@ +--- +{ + "title": "CATALOGS", + "language": "zh-CN" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +表函数,生成 catalogs 临时表,可以查看当前doris中的创建的 catalogs 信息。 + +该函数用于 from 子句中。 + +#### syntax + +`catalogs()` + +catalogs()表结构: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +`catalogs()` tvf展示的信息是综合了 `show catalogs` 与 `show catalog xxx` 语句的结果。 + +可以利用tvf生成的表去做过滤、join等操作。 + + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..463fd80f52828 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,101 @@ +--- +{ + "title": "FRONTENDS", + "language": "zh-CN" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +表函数,生成 frontends 临时表,可以查看当前 doris 集群中的 FE 节点信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends()` + +frontends() 表结构: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +`frontends()` tvf 展示出来的信息基本与 `show frontends` 语句展示出的信息一致,但是 `frontends()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends()` tvf 信息展示进行了鉴权,与 `show frontends` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..835d1eed135e5 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,86 @@ +--- +{ + "title": "frontends_disks", + "language": "zh-CN" +} +--- + + + +## `frontends_disks` + +### Name + +frontends_disks + +### description + +表函数,生成 frontends_disks 临时表,可以查看当前 doris 集群中的 FE 节点的磁盘信息。 + +该函数用于 from 子句中。 + +#### syntax +`frontends_disks()` + +frontends_disks() 表结构: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +`frontends_disks()` tvf 展示出来的信息基本与 `show frontends disks` 语句展示出的信息一致,但是 `frontends_disks()` tvf 的各个字段类型更加明确,且可以利用 tvf 生成的表去做过滤、join 等操作。 + +对 `frontends_disks()` tvf 信息展示进行了鉴权,与 `show frontends disks` 行为保持一致,要求用户具有 ADMIN/OPERATOR 权限。 + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..d71923b184e1c --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,151 @@ +--- +{ + "title": "HDFS", + "language": "zh-CN" +} +--- + + + +## HDFS + +### Description + +HDFS 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 HDFS 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +#### syntax +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +访问 HDFS 相关参数: +- `uri`:(必填)访问 HDFS 的 uri。如果 uri 路径不存在或文件都是空文件,HDFS TVF 将返回空集合。 +- `fs.defaultFS`:(必填) +- `hadoop.username`:(必填)可以是任意字符串,但不能为空 +- `hadoop.security.authentication`:(选填) +- `hadoop.username`:(选填) +- `hadoop.kerberos.principal`:(选填) +- `hadoop.kerberos.keytab`:(选填) +- `dfs.client.read.shortcircuit`:(选填) +- `dfs.domain.socket.path`:(选填) + +访问 HA 模式 HDFS 相关参数: +- `dfs.nameservices`:(选填) +- `dfs.ha.namenodes.your-nameservices`:(选填) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`:(选填) +- `dfs.client.failover.proxy.provider.your-nameservices`:(选填) + +文件格式相关参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + + 下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[JSON Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `json_paths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + + 下面 2 个参数用于 CSV 格式的导入: + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 Format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如/path/to/city=beijing/date="2023-07-09", 则填写`path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,HDFS TVF 可以利用已有的 HFDS Resource 来直接访问 HDFS。创建 HDFS Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限 +::: + +### Examples + +读取并访问 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +读取并访问 HA 模式的 HDFS 存储上的 CSV 格式文件 +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +可以配合 `desc function` 使用。 + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + HDFS, table-valued-function, TVF + +### Best Practice + + 关于 HDFS TVF 的更详细使用方法可以参照 [S3](./s3.md) TVF, 唯一不同的是访问存储系统的方式不一样。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..30cdfb54158c8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,97 @@ +--- +{ +"title": "ICEBERG_META", +"language": "zh-CN" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta 表函数(table-valued-function,tvf),可以用于读取 iceberg 表的各类元数据信息,如操作历史、生成的快照、文件元数据等。 + +#### syntax +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**参数说明** + +iceberg_meta 表函数 tvf 中的每一个参数都是一个 `"key"="value"` 对。 +相关参数: +- `table`: (必填) 完整的表名,需要按照目录名。库名。表名的格式,填写需要查看的 iceberg 表名。 +- `query_type`: (必填) 想要查看的元数据类型,目前仅支持 snapshots。 + +### Example + +读取并访问 iceberg 表格式的 snapshots 元数据。 + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +可以配合`desc function`使用 + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +查看 iceberg 表的 snapshots + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +根据 snapshot_id 字段筛选 + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md new file mode 100644 index 0000000000000..19885eae4d3d8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -0,0 +1,146 @@ +--- +{ + "title": "JOBS", + "language": "zh-CN" +} +--- + + + +## `jobs` + +### Name + +jobs + +### description + +表函数,生成任务临时表,可以查看某个任务类型中的job信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`jobs("type"="")` + +**参数说明** + +| 参数名 | 说明 | 类型 | 是否必填 | +|:-----|:-----|:-------|:-----| +| type | 作业类型 | string | 是 | + +type 支持的类型: + +- insert:insert into 类型的任务。 +- mv:物化视图类型的任务。 +##### Insert 任务 +jobs("type"="insert")表结构: +```sql +mysql> desc function jobs("type"="insert"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| Definer | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ExecuteSql | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| SucceedTaskCount | TEXT | No | false | NULL | NONE | +| FailedTaskCount | TEXT | No | false | NULL | NONE | +| CanceledTaskCount | TEXT | No | false | NULL | NONE | +| Comment | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` +* Id:job id. +* Name:job名称. +* Definer:job定义者. +* ExecuteType:执行类型 +* RecurringStrategy:循环策略 +* Status:job状态 +* ExecuteSql:执行SQL +* CreateTime:job 创建时间 +* SucceedTaskCount:成功任务数量 +* FailedTaskCount:失败任务数量 +* CanceledTaskCount:取消任务数量 +* Comment:job 注释 +##### 物化视图任务 +jobs("type"="mv")表结构: +```sql +mysql> desc function jobs("type"="mv"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +10 rows in set (0.00 sec) +``` + +* Id:job id. +* Name:job名称. +* MvId:物化视图id +* MvName:物化视图名称 +* MvDatabaseId:物化视图所属db id +* MvDatabaseName:物化视图所属db名称 +* ExecuteType:执行类型 +* RecurringStrategy:循环策略 +* Status:job状态 +* CreateTime:task创建时间 + +### example + +1. 查看所有物化视图的job + +```sql +mysql> select * from jobs("type"="mv"); +``` + +2. 查看 name 为`inner_mtmv_75043`的 job + +```sql +mysql> select * from jobs("type"="mv") where Name="inner_mtmv_75043"; +``` +3. 查看所有 insert 任务 + +```sql +mysql> select * from jobs("type"="insert"); +``` +4. 查看 name 为`one_insert_job`的 job + +```sql +mysql> select * from jobs("type"="insert") where Name='one_insert_job'; +``` +### keywords + + jobs, job, insert, mv, materialized view, schedule diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..7a3d945fe69cb --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,192 @@ +--- +{ + "title": "LOCAL", + "language": "zh-CN" +} +--- + + + +## local + +### Name + +local + +### Description + +Local表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 be 上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +该函数需要 ADMIN 权限。 + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**参数说明** + +- 访问local文件的相关参数: + + - `file_path` + + (必填)待读取文件的路径,该路径是一个相对于 `user_files_secure_path` 目录的相对路径, 其中 `user_files_secure_path` 参数是 [be的一个配置项](../../../admin-manual/config/be-config.md) 。 + + 路径中不能包含 `..`,可以使用 glob 语法进行模糊匹配,如:`logs/*.log` + +- 执行方式相关: + + 在 2.1.1 之前的版本中,Doris 仅支持指定某一个 BE 节点,读取该节点上的本地数据文件。 + + - `backend_id`: + + 文件所在的 be id。 `backend_id` 可以通过 `show backends` 命令得到。 + + 从 2.1.2 版本开始,Doris 增加了新的参数 `shared_storage`。 + + - `shared_storage` + + 默认为 false。如果为 true,表示指定的文件存在于共享存储上(比如 NAS)。共享存储必须兼容 POXIS 文件接口,并且同时挂载在所有 BE 节点上。 + + 当 `shared_storage` 为 true 时,可以不设置 `backend_id`,Doris 可能会利用到所有 BE 节点进行数据访问。如果设置了 `backend_id`,则仍然仅在指定 BE 节点上执行。 + +- 文件格式相关参数: + + - `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`:(选填) 列分割符, 默认为`,`。 + - `line_delimiter`:(选填) 行分割符,默认为`\n`。 + - `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。 默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +- 以下参数适用于json格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (选填) 默认为 `"true"` + - `strip_outer_array`: (选填) 默认为 `"false"` + - `json_root`: (选填) 默认为空 + - `json_paths`: (选填) 默认为空 + - `num_as_string`: (选填) 默认为 `false` + - `fuzzy_parse`: (选填) 默认为 `false` + +- 以下参数适用于csv格式的导入: + + - `trim_double_quotes`: 布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 csv 文件每个字段最外层的双引号 + - `skip_lines`: 整数类型,选填,默认值为0,含义为跳过csv文件的前几行。当设置format设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +### Examples + +分析指定 BE 上的日志文件: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +读取和访问位于路径`${DORIS_HOME}/student.csv`的 csv格式文件: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +访问 NAS 上的共享数据: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +可以配合`desc function`使用 + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + + local, table-valued-function, tvf + +### Best Practice + +- 关于 local tvf 的更详细使用方法可以参照 [S3](./s3.md) tvf, 唯一不同的是访问存储系统的方式不一样。 + +- 通过 local tvf 访问 NAS 上的数据 + + NAS 共享存储允许同时挂载到多个节点。每个节点都可以像访问本地文件一样访问共享存储中的文件。因此,可以将 NAS 视为本地文件系统,通过 local tvf 进行访问。 + + 当设置 `"shared_storage" = "true"` 时,Doris 会认为所指定的文件可以在任意 BE 节点访问。当使用通配符指定了一组文件时,Doris 会将访问文件的请求分发到多个 BE 节点上,这样可以利用多个节点的进行分布式文件扫描,提升查询性能。 + + + + + + + + + diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md new file mode 100644 index 0000000000000..67c7b58ffcb9d --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -0,0 +1,102 @@ +--- +{ + "title": "MV_INFOS", + "language": "zh-CN" +} +--- + + + +## `mv_infos` + +### Name + +mv_infos + +### description + +表函数,生成异步物化视图临时表,可以查看某个db中创建的异步物化视图信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`mv_infos("database"="")` + +mv_infos()表结构: +```sql +mysql> desc function mv_infos("database"="tpch100"); ++--------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------+---------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| SchemaChangeDetail | TEXT | No | false | NULL | NONE | +| RefreshState | TEXT | No | false | NULL | NONE | +| RefreshInfo | TEXT | No | false | NULL | NONE | +| QuerySql | TEXT | No | false | NULL | NONE | +| EnvInfo | TEXT | No | false | NULL | NONE | +| MvProperties | TEXT | No | false | NULL | NONE | +| MvPartitionInfo | TEXT | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | ++--------------------+---------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` + +* Id:物化视图id +* Name:物化视图Name +* JobName:物化视图对应的job名称 +* State:物化视图状态 +* SchemaChangeDetail:物化视图State变为SchemaChange的原因 +* RefreshState:物化视图刷新状态 +* RefreshInfo:物化视图定义的刷新策略信息 +* QuerySql:物化视图定义的查询语句 +* EnvInfo:物化视图创建时的环境信息 +* MvProperties:物化视属性 +* MvPartitionInfo:物化视图的分区信息 +* SyncWithBaseTables:是否和base表数据同步,如需查看哪个分区不同步,请使用[SHOW PARTITIONS](../sql-reference/Show-Statements/SHOW-PARTITIONS.md) + +### example + +1. 查看db1下的所有物化视图 + +```sql +mysql> select * from mv_infos("database"="db1"); +``` + +2. 查看db1下的物化视图名称为mv1的物化视图 + +```sql +mysql> select * from mv_infos("database"="db1") where Name = "mv1"; +``` + +3. 查看db1下的物化视图名称为mv1的状态 + +```sql +mysql> select State from mv_infos("database"="db1") where Name = "mv1"; +``` + +### keywords + + mv, infos diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..ce25fc0240cd3 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "zh-CN" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +表函数,生成分区临时表,可以查看某个 TABLE 的分区列表。 + +该函数用于 From 子句中。 + +该函数自 2.1.5 版本开始支持。 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions()表结构: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:分区id +* PartitionName:分区名字 +* VisibleVersion:分区版本 +* VisibleVersionTime:分区版本提交时间 +* State:分区状态 +* PartitionKey:分区key +* Range:分区范围 +* DistributionKey:分布key +* Buckets:分桶数量 +* ReplicationNum:副本数 +* StorageMedium:存储介质 +* CooldownTime:cooldown时间 +* RemoteStoragePolicy:远程存储策略 +* LastConsistencyCheckTime:上次一致性检查时间 +* DataSize:数据大小 +* IsInMemory:是否存在内存 +* ReplicaAllocation:分布策略 +* IsMutable:是否可变 +* SyncWithBaseTables:是否和基表数据同步(针对异步物化视图的分区) +* UnsyncTables:和哪个基表数据不同步(针对异步物化视图的分区) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:分区名字 + +### Example + +1. 查看 internal CATALOG 下 db1 的 table1 的分区列表 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. 查看 table1 下的分区名称为 partition1 的分区信息 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. 查看 table1 下的分区名称为 partition1 的分区 id + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md new file mode 100644 index 0000000000000..3e7e715db076b --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md @@ -0,0 +1,110 @@ +--- +{ +"title": "QUERY", +"language": "zh-CN" +} +--- + + + +## query + +### Name + +query + +### description + +query 表函数(table-valued-function,tvf),可用于将查询语句直接透传到某个 catalog 进行数据查询 + +:::info note +Doris 2.1.3 版本开始支持,当前仅支持透传查询 jdbc catalog。 +需要先在 Doris 中创建对应的 catalog。 +::: + +#### syntax + +```sql +query( + "catalog" = "catalog_name", + "query" = "select * from db_name.table_name where condition" + ); +``` + +**参数说明** + +query表函数 tvf中的每一个参数都是一个 `"key"="value"` 对。 +相关参数: +- `catalog`: (必填) catalog名称,需要按照catalog的名称填写。 +- `query`: (必填) 需要执行的查询语句。 + +### Example + +使用 query 函数查询 jdbc 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +可以配合`desc function`使用 + +```sql +desc function query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +### Keywords + + query, table-valued-function, tvf + +### Best Prac + +透传查询 jdbc catalog 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from test.student"); ++------+---------+ +| id | name | ++------+---------+ +| 1 | alice | +| 2 | bob | +| 3 | jack | ++------+---------+ +select * from query("catalog" = "jdbc", "query" = "select * from test.score"); ++------+---------+ +| id | score | ++------+---------+ +| 1 | 100 | +| 2 | 90 | +| 3 | 80 | ++------+---------+ +``` + +透传关联查询 jdbc catalog 数据源中的表 + +```sql +select * from query("catalog" = "jdbc", "query" = "select a.id, a.name, b.score from test.student a join test.score b on a.id = b.id"); ++------+---------+---------+ +| id | name | score | ++------+---------+---------+ +| 1 | alice | 100 | +| 2 | bob | 90 | +| 3 | jack | 80 | ++------+---------+---------+ +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..8dd80cab100ee --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,564 @@ +--- +{ + "title": "S3", + "language": "zh-CN" +} +--- + + + +## S3 + +### Name + +s3 + +### description + +S3 表函数(table-valued-function,tvf),可以让用户像访问关系表格式数据一样,读取并访问 S3 兼容的对象存储上的文件内容。目前支持`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`文件格式。 + +**语法** + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**参数说明** + +S3 TVF 中的每一个参数都是一个 `"key"="value"` 对。 +访问 S3 相关参数: +- `uri`: (必填) 访问 S3 的 URI,S3 表函数会根据 `use_path_style` 参数来决定是否使用 Path Style 访问方式,默认为 Virtual-hosted Style 方式 +- `s3.access_key`: (必填) +- `s3.secret_key`: (必填) +- `s3.region`: (选填)。如果 Minio 服务设置了其他的 Region,那么必填,否则默认使用`us-east-1`。 +- `s3.session_token`: (选填) +- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 Virtual-hosted Syle 方式。但某些对象存储系统可能没开启或没支持 Virtual-hosted Style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 Path Style 方式。比如 `minio` 默认情况下只允许 `path style` 访问方式,所以在访问 MinIO 时要加上 `use_path_style=true`。 +- `force_parsing_by_standard_uri`:(选填)默认 `false` 。我们可以添加 `force_parsing_by_standard_uri` 参数来强制将非标准的 URI 解析为标准 URI。 + +> 对于 AWS S3,标准 uri styles 有以下几种: +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88`。 +> 2. Virtual Host Style:`https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> 3. Path Style:`https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88`。 +> +> 除了支持以上三个标准常见的 URI Styles, 还支持其他一些 URI Styles(也许不常见,但也有可能有): +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> 详细使用案例可以参考最下方 Best Practice。 + +文件格式参数: +- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`:(选填) 列分割符,默认为`\t`。 +- `line_delimiter`:(选填) 行分割符,默认为`\n`。 +- `compress_type`: (选填) 目前支持 `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`。默认值为 `UNKNOWN`, 将会根据 `uri` 的后缀自动推断类型。 + +下面 6 个参数是用于 JSON 格式的导入,具体使用方法可以参照:[Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (选填) 默认为 `"true"` +- `strip_outer_array`: (选填) 默认为 `"false"` +- `json_root`: (选填) 默认为空 +- `jsonpaths`: (选填) 默认为空 +- `num_as_string`: (选填) 默认为 `false` +- `fuzzy_parse`: (选填) 默认为 `false` + +下面 2 个参数是用于 CSV 格式的导入 + +- `trim_double_quotes`:布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 CSV 文件每个字段最外层的双引号 +- `skip_lines`:整数类型,选填,默认值为 0,含义为跳过 CSV 文件的前几行。当设置 format 设置为 `csv_with_names` 或 `csv_with_names_and_types` 时,该参数会失效 + +其他参数: +- `path_partition_keys`:(选填)指定文件路径中携带的分区列名,例如 `/path/to/city=beijing/date="2023-07-09"`, 则填写 `path_partition_keys="city,date"`,将会自动从路径中读取相应列名和列值进行导入。 +- `resource`:(选填)指定 Resource 名,S3 TVF 可以利用已有的 S3 Resource 来直接访问 S3。创建 S3 Resource 的方法可以参照 [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md)。该功能自 2.1.4 版本开始支持。 + +:::tip 注意 +直接查询 TVF 或基于该 TVF 创建 View ,需要拥有该 Resource 的 USAGE 权限,查询基于 TVF 创建的 View ,只需要该 View 的 SELECT 权限。 +::: + +### Example + +读取并访问 S3 兼容的对象存储上的 CSV 格式文件 + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + + +可以配合 `desc function` 使用 + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + S3, table-valued-function, TVF + +### Best Practice + +**不同 url schema 的写法** +http:// 、https:// 使用示例: +```sql +// 注意URI Bucket写法以及`use_path_style`参数设置,HTTP 同理。 +// 由于设置了 `"use_path_style"="true"`, 所以将采用 Path Style 的方式访问 S3。 +select * from s3( + "uri" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// 注意 URI Bucket写法以及use_path_style参数设置,http同理。 +// 由于设置了 `"use_path_style"="false"`, 所以将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "https://bucket.endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// 阿里云 OSS 和腾讯云 COS 采用 Virtual-hosted Style 方式访问 S3。 +// OSS +select * from s3( + "uri" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "uri" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "http://172.21.0.101:9000", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +// 百度云 BOS 采用兼容 S3 协议的 Virtual-hosted Style 方式访问 S3。 +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +s3:// 使用示例: + +```sql +// 注意 URI Bucket 写法, 无需设置 `use_path_style` 参数。 +// 将采用 Virtual-hosted Style 方式访问 S3。 +select * from s3( + "uri" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region"= "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +其它支持的 URI 风格示例: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = false` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style。通过设置 `use_path_style = true` 以及 `force_parsing_by_standard_uri = true` 来使用。 +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + + +**CSV format** +由于 S3 table-valued-function 事先并不知道 Table Schema,所以会先读一遍文件来解析出 Table Schema。 + +`csv` 格式:S3 table-valued-function 读取 S3 上的文件并当作 CSV 文件来处理,读取文件中的第一行用于解析 Table Schema。文件第一行的列个数 `n` 将作为 Table Schema 的列个数,Table Schema 的列名则自动取名为 `c1, c2, ..., cn` ,列类型都设置为 `String`, 举例: + +student1.csv 文件内容为: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +可以配合 `desc function S3()` 来查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` 格式:解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 `String`, 举例: + +student_with_names.csv 文件内容为 + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types foramt** + +`csv_with_names_and_types` 格式:目前暂不支持从 CSV 文件中解析出 Column Type。使用该 Format 时,S3 TVF 会解析文件的第一行作为 Table Schema 的列个数和列名,列类型则都设置为 String,同时将忽略该文件的第二行。 + +`student_with_names_and_types.csv` 文件内容为 + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +使用 S3 TVF + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +同样配合 `desc function S3()` 可查看 Table Schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**JSON format** + +`json` 格式:JSON 格式涉及到较多的可选参数,各个参数的意义可以参考:[Json Load](../../../data-operate/import/import-way/load-json-format.md)。S3 TVF 查询 JSON 格式文件时根据 `json_root` 和 `jsonpaths` 参数定位到一个 JSON 对象,将该对象的中的 `key` 作为 Table Schema 的列名,列类型都设置为 String。举例: + +data.json 文件 + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +使用 S3 TVF 查询 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**Parquet format** + +`parquet` 格式:S3 TVF 支持从 Parquet 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` 格式:和 `parquet` format 使用方法一致,将 `format` 参数设置为 `orc`。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` +**avro format** + +`avro` 格式:S3 TVF 支持从 avro 文件中解析出 Table Schema 的列名、列类型。举例: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**URI 包含通配符** + +URI 可以使用通配符来读取多个文件。注意:如果使用通配符要保证各个文件的格式是一致的 (尤其是 `csv`/`csv_with_names`/`csv_with_names_and_types` 算做不同的格式),S3 TVF 用第一个文件来解析出 Table Schema。 +如下两个 CSV 文件: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +可以在 URI 上使用通配符来导入。 + +```sql +MySQL [(none)]> select * from s3( + "uri" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**配合 `insert into` 和 `cast` 使用 `S3` TVF** + +```sql +// 创建 Doris 内部表 +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// 使用 S3 插入数据 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md new file mode 100644 index 0000000000000..5cdeac64e21dc --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -0,0 +1,168 @@ +--- +{ + "title": "TASKS", + "language": "zh-CN" +} +--- + + + +## `tasks` + +### Name + +tasks + +### description + +表函数,生成 tasks 临时表,可以查看当前 doris 集群中的 job 产生的 tasks 信息。 + +该函数用于 from 子句中。 + +该函数自 2.1.0 版本支持。 + +#### syntax + +`tasks("type"="insert");` +**参数说明** + +| 参数名 | 说明 | 类型 | 是否必填 | +|:-----|:-------|:-------|:-----| +| type | 作业类型 | string | 是 | + +type 支持的类型: + +- insert:insert into 类型的任务。 +- mv: mv 类型的任务 + +##### Insert tasks +`tasks("type"="insert");`表结构: +``` +mysql> desc function tasks("type"="insert");; ++---------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| Label | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| TrackingUrl | TEXT | No | false | NULL | NONE | +| LoadStatistic | TEXT | No | false | NULL | NONE | +| User | TEXT | No | false | NULL | NONE | ++---------------+------+------+-------+---------+-------+ +11 row in set (0.01 sec) +``` +- TaskId:task id +- JobId:job id +- JobName:job 名称 +- Label:label +- Status:task 状态 +- ErrorMsg:task 失败信息 +- CreateTime:task 创建时间 +- FinishTime:task 结束时间 +- TrackingUrl:task tracking url +- LoadStatistic:task 统计信息 +- User:执行用户 +##### MV tasks +```sql +mysql> desc function tasks("type"="mv"); ++-----------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| StartTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| DurationMs | TEXT | No | false | NULL | NONE | +| TaskContext | TEXT | No | false | NULL | NONE | +| RefreshMode | TEXT | No | false | NULL | NONE | +| NeedRefreshPartitions | TEXT | No | false | NULL | NONE | +| CompletedPartitions | TEXT | No | false | NULL | NONE | +| Progress | TEXT | No | false | NULL | NONE | ++-----------------------+------+------+-------+---------+-------+ +18 rows in set (0.00 sec) +``` +* TaskId:task id +* JobId:job id +* JobName:job 名称 +* MvId:物化视图 id +* MvName:物化视图名称 +* MvDatabaseId:物化视图所属 db id +* MvDatabaseName:物化视图所属 db 名称 +* Status:task 状态 +* ErrorMsg:task 失败信息 +* CreateTime:task 创建时间 +* StartTime:task 开始运行时间 +* FinishTime:task 结束运行时间 +* DurationMs:task 运行时间 +* TaskContext:task 运行参数 +* RefreshMode:刷新模式 +* NeedRefreshPartitions:本次 task 需要刷新的分区信息 +* CompletedPartitions:本次 task 刷新完成的分区信息 +* Progress:task 运行进度 + +### example +#### Insert Tasks +``` +mysql> select * from tasks("type"="insert") limit 1 \G +*************************** 1. row *************************** + TaskId: 667704038678903 + JobId: 10069 + Label: 10069_667704038678903 + Status: FINISHED + EtlInfo: \N + TaskInfo: cluster:N/A; timeout(s):14400; max_filter_ratio:0.0; priority:NORMAL + ErrorMsg: \N + CreateTimeMs: 2023-12-08 16:46:57 + FinishTimeMs: 2023-12-08 16:46:57 + TrackingUrl: +LoadStatistic: {"Unfinished backends":{},"ScannedRows":0,"TaskNumber":0,"LoadBytes":0,"All backends":{},"FileNumber":0,"FileSize":0} + User: root +1 row in set (0.05 sec) + +``` +#### MV Tasks +1. 查看所有物化视图的 task + +```sql +mysql> select * from tasks("type"="mv"); +``` + +2. 查看 jobName 为`inner_mtmv_75043`的所有 task + +```sql +mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; +``` + +### keywords + + tasks, job, insert, mv, materilized view diff --git a/sidebars.json b/sidebars.json index ebfa49e556b4a..0af8578d3bd7f 100644 --- a/sidebars.json +++ b/sidebars.json @@ -1064,21 +1064,26 @@ "sql-manual/sql-functions/table-functions/explode-bitmap", "sql-manual/sql-functions/table-functions/numbers", "sql-manual/sql-functions/table-functions/explode-numbers", - "sql-manual/sql-functions/table-functions/explode-numbers-outer", - "sql-manual/sql-functions/table-functions/s3", - "sql-manual/sql-functions/table-functions/hdfs", - "sql-manual/sql-functions/table-functions/local", - "sql-manual/sql-functions/table-functions/iceberg-meta", - "sql-manual/sql-functions/table-functions/backends", - "sql-manual/sql-functions/table-functions/frontends", - "sql-manual/sql-functions/table-functions/workload-group", - "sql-manual/sql-functions/table-functions/catalogs", - "sql-manual/sql-functions/table-functions/frontends_disks", - "sql-manual/sql-functions/table-functions/active_queries", - "sql-manual/sql-functions/table-functions/jobs", - "sql-manual/sql-functions/table-functions/mv_infos", - "sql-manual/sql-functions/table-functions/tasks", - "sql-manual/sql-functions/table-functions/query" + "sql-manual/sql-functions/table-functions/explode-numbers-outer" + ] + }, + { + "type": "category", + "label": "Table Valued Functions", + "items": [ + "sql-manual/sql-functions/table-valued-functions/s3", + "sql-manual/sql-functions/table-valued-functions/hdfs", + "sql-manual/sql-functions/table-valued-functions/local", + "sql-manual/sql-functions/table-valued-functions/query", + "sql-manual/sql-functions/table-valued-functions/iceberg-meta", + "sql-manual/sql-functions/table-valued-functions/backends", + "sql-manual/sql-functions/table-valued-functions/frontends", + "sql-manual/sql-functions/table-valued-functions/frontends_disks", + "sql-manual/sql-functions/table-valued-functions/catalogs", + "sql-manual/sql-functions/table-valued-functions/jobs", + "sql-manual/sql-functions/table-valued-functions/mv_infos", + "sql-manual/sql-functions/table-valued-functions/partitions", + "sql-manual/sql-functions/table-valued-functions/tasks" ] }, { @@ -1671,4 +1676,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/versioned_docs/version-2.0/admin-manual/resource-admin/workload-group.md b/versioned_docs/version-2.0/admin-manual/resource-admin/workload-group.md index c6bdd18d02d64..96681a1fc95ea 100644 --- a/versioned_docs/version-2.0/admin-manual/resource-admin/workload-group.md +++ b/versioned_docs/version-2.0/admin-manual/resource-admin/workload-group.md @@ -62,7 +62,7 @@ properties ( "enable_memory_overcommit"="true" ). ``` -For details on creating a workload group, see [CREATE-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-WORKLOAD-GROUP.md), and to delete a workload group, refer to [DROP-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-WORKLOAD-GROUP.md); to modify a workload group, refer to [ALTER-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-WORKLOAD-GROUP.md); to view the workload group, refer to: [WORKLOAD_GROUPS()](../../sql-manual/sql-functions/table-functions/workload-group.md) and [SHOW-WORKLOAD-GROUPS](../../sql-manual/sql-reference/Show-Statements/SHOW-WORKLOAD-GROUPS.md). +For details on creating a workload group, see [CREATE-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-WORKLOAD-GROUP.md), and to delete a workload group, refer to [DROP-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-WORKLOAD-GROUP.md); to modify a workload group, refer to [ALTER-WORKLOAD-GROUP](../../sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-WORKLOAD-GROUP.md); to view the workload group, refer to: [WORKLOAD_GROUPS()](../../sql-manual/sql-functions/table-valued-functions/workload-group.md) and [SHOW-WORKLOAD-GROUPS](../../sql-manual/sql-reference/Show-Statements/SHOW-WORKLOAD-GROUPS.md). 4. turn on the pipeline execution engine, the workload group cpu isolation is based on the implementation of the pipeline execution engine, so you need to turn on the session variable: @@ -106,4 +106,4 @@ It should be noted that the current queuing design is not aware of the number of A Doris cluster is configured with a work load group and set max_concurrency=1, If there is only 1 FE in the cluster, then this workload group will only run one SQL at the same time from the Doris cluster perspective, -If there are 3 FEs, the maximum number of query that can be run in Doris cluster is 3. \ No newline at end of file +If there are 3 FEs, the maximum number of query that can be run in Doris cluster is 3. diff --git a/versioned_docs/version-2.0/data-operate/import/load-json-format.md b/versioned_docs/version-2.0/data-operate/import/load-json-format.md index 8bc2136b7a707..0359fc41eb221 100644 --- a/versioned_docs/version-2.0/data-operate/import/load-json-format.md +++ b/versioned_docs/version-2.0/data-operate/import/load-json-format.md @@ -31,7 +31,7 @@ Doris supports importing data in JSON format. This document mainly describes the Currently, only the following import methods support data import in JSON format: -- Through [S3 table function](../../sql-manual/sql-functions/table-functions/s3.md) import statement: insert into table select * from S3(); +- Through [S3 table function](../../sql-manual/sql-functions/table-valued-functions/s3.md) import statement: insert into table select * from S3(); - Import the local JSON format file through [STREAM LOAD](../../sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md). - Subscribe and consume JSON format in Kafka via [ROUTINE LOAD](../../sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md) information. diff --git a/versioned_docs/version-2.0/lakehouse/datalake-analytics/iceberg.md b/versioned_docs/version-2.0/lakehouse/datalake-analytics/iceberg.md index daec378de3da3..9fa4e52db8792 100644 --- a/versioned_docs/version-2.0/lakehouse/datalake-analytics/iceberg.md +++ b/versioned_docs/version-2.0/lakehouse/datalake-analytics/iceberg.md @@ -228,4 +228,4 @@ You can use the `FOR TIME AS OF` and `FOR VERSION AS OF` statements to read hist `SELECT * FROM iceberg_tbl FOR VERSION AS OF 868895038966572;` -In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-functions/iceberg-meta) table function to query the snapshot information of the specified table. +In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-valued-functions/iceberg-meta) table function to query the snapshot information of the specified table. diff --git a/versioned_docs/version-2.0/lakehouse/file.md b/versioned_docs/version-2.0/lakehouse/file.md index 710750f7b0bca..5694086386af1 100644 --- a/versioned_docs/version-2.0/lakehouse/file.md +++ b/versioned_docs/version-2.0/lakehouse/file.md @@ -30,9 +30,11 @@ With the Table Value Function feature, Doris is able to query files in object st For more usage details, please see the documentation: -* [S3](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/s3/): supports file analysis on object storage compatible with S3 +* [S3](../sql-manual/sql-functions/table-valued-functions/s3.md): supports file analysis on object storage compatible with S3 -* [HDFS](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/hdfs/): supports file analysis on HDFS +* [HDFS](../sql-manual/sql-functions/table-valued-functions/hdfs.md): supports file analysis on HDFS + +* [LOCAL](../sql-manual/sql-functions/table-valued-functions/local.md): supports file analysis on local file system The followings illustrate how file analysis is conducted with the example of S3 Table Value Function. diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..0f1476fb9fd2e --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,112 @@ +--- +{ + "title": "BACKENDS", + "language": "en" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +Table-Value-Function, generate a temporary table named `backends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`backends()` + +The table schema of `backends()` tvf: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +The information displayed by the `backends` tvf is basically consistent with the information displayed by the `show backends` statement. However, the types of each field in the `backends` tvf are more specific, and you can use the `backends` tvf to perform operations such as filtering and joining. + +The information displayed by the `backends` tvf is authenticated, which is consistent with the behavior of `show backends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..e748297da7ff0 --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,91 @@ +--- +{ + "title": "CATALOGS", + "language": "en" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +The table function generates a temporary table of catalogs to view the information of the catalogs created in the current Doris. + +This function is used in the from clause. + +#### syntax + +`catalogs()` + +Catalogs () table structure: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +The information presented by `catalogs()` tvf is the result of synthesizing `show catalogs` and `show catalog xxx` statements. + +The table generated by tvf can be used for filtering, join and other operations. + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..e66ec1200a790 --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,102 @@ +--- +{ + "title": "FRONTENDS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends()` + +The table schema of `frontends()` tvf: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +The information displayed by the `frontends` tvf is basically consistent with the information displayed by the `show frontends` statement. However, the types of each field in the `frontends` tvf are more specific, and you can use the `frontends` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends` tvf is authenticated, which is consistent with the behavior of `show frontends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..0532fc477ebac --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,87 @@ +--- +{ + "title": "FRONTENDS_DISKS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends_disks`. This tvf is used to view the information of FE nodes 's disks in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends_disks()` + +The table schema of `frontends_disks()` tvf: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +The information displayed by the `frontends_disks` tvf is basically consistent with the information displayed by the `show frontends disks` statement. However, the types of each field in the `frontends_disks` tvf are more specific, and you can use the `frontends_disks` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends_disks` tvf is authenticated, which is consistent with the behavior of `show frontends disks`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..7748a302ab48b --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,162 @@ +--- +{ + "title": "HDFS", + "language": "en" +} +--- + + + +## HDFS + +### Name + +hdfs + +### Description + +HDFS table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +Related parameters for accessing hdfs: + +- `uri`: (required) hdfs uri. If the uri path does not exist or the files are empty files, hdfs tvf will return an empty result set. +- `fs.defaultFS`: (required) +- `hadoop.username`: (required) Can be any string, but cannot be empty. +- `hadoop.security.authentication`: (optional) +- `hadoop.username`: (optional) +- `hadoop.kerberos.principal`: (optional) +- `hadoop.kerberos.keytab`: (optional) +- `dfs.client.read.shortcircuit`: (optional) +- `dfs.domain.socket.path`: (optional) + +Related parameters for accessing HDFS in HA mode: + +- `dfs.nameservices`: (optional) +- `dfs.ha.namenodes.your-nameservices`: (optional) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`: (optional) +- `dfs.client.failover.proxy.provider.your-nameservices`: (optional) + +File format parameters: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + + The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `json_paths`: (optional) default `""` +- `num_as_string`: (optional) default `false` +- `fuzzy_parse`: (optional) default `false` + + The following 2 parameters are used for loading in csv format: + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other kinds of parameters: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Examples + +Read and access csv format files on hdfs storage. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Read and access csv format files on hdfs storage in HA mode. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Can be used with `desc function` : + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + hdfs, table-valued-function, tvf + +### Best Practice + + For more detailed usage of HDFS tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..e6788a858fc90 --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,99 @@ +--- +{ +"title": "ICEBERG_META", +"language": "en" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta table-valued-function(tvf), Use for read iceberg metadata, operation history, snapshots of table, file metadata etc. + +#### syntax + +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**parameter description** + +Each parameter in iceberg_meta tvf is a pair of `"key"="value"`. + +Related parameters: +- `table`: (required) Use iceberg table name the format `catlog.database.table`. +- `query_type`: (required) The type of iceberg metadata. Only `snapshots` is currently supported. + +### Example + +Read and access the iceberg tabular metadata for snapshots. + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +Can be used with `desc function` : + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +Inspect the iceberg table snapshots : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +Filtered by snapshot_id : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..4f39a8dae76ae --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,183 @@ +--- +{ + "title": "LOCAL", + "language": "en" +} +--- + + + +## local + +### Name + +local + +### Description + +Local table-valued-function(tvf), allows users to read and access local file contents on be node, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +It needs `ADMIN` privilege to use. + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +- Related parameters for accessing local file on be node: + + - `file_path`: + + (required) The path of the file to be read, which is a relative path to the `user_files_secure_path` directory, where `user_files_secure_path` parameter [can be configured on be](../../../admin-manual/config/be-config.md). + + Can not contains `..` in path. Support using glob syntax to match multi files, such as `log/*.log` + +- Related to execution method: + + In versions prior to 2.1.1, Doris only supported specifying a BE node to read local data files on that node. + + - `backend_id`: + + The be id where the file is located. `backend_id` can be obtained through the `show backends` command. + + Starting from version 2.1.2, Doris adds a new parameter `shared_storage`. + + - `shared_storage` + + Default is false. If true, the specified file exists on shared storage (such as NAS). Shared storage must be compatible with the POXIS file interface and mounted on all BE nodes at the same time. + + When `shared_storage` is true, you do not need to set `backend_id`, Doris may use all BE nodes for data access. If `backend_id` is set, still only executes on the specified BE node. + +- File format parameters: + + - `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`: (optional) default `,`. + - `line_delimiter`: (optional) default `\n`. + - `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +- The following parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (optional) default `"true"` + - `strip_outer_array`: (optional) default `"false"` + - `json_root`: (optional) default `""` + - `json_paths`: (optional) default `""` + - `num_as_string`: (optional) default `false` + - `fuzzy_parse`: (optional) default `false` + +- The following parameters are used for loading in csv format + + - `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. + - `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +### Examples + +Analyze the log file on specified BE: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +Read and access csv format files located at path `${DORIS_HOME}/student.csv`: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +Query files on NAS: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +Can be used with `desc function` : + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + +local, table-valued-function, tvf + +### Best Practice + +- For more detailed usage of local tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. + +- Access data on NAS through local tvf + + NAS shared storage allows to be mounted to multiple nodes at the same time. Each node can access files in the shared storage just like local files. Therefore, the NAS can be thought of as a local file system, accessed through local tvf. + + When setting `"shared_storage" = "true"`, Doris will think that the specified file can be accessed from any BE node. When a set of files is specified using wildcards, Doris will distribute requests to access files to multiple BE nodes, so that multiple nodes can be used to perform distributed file scanning and improve query performance. diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..7bda80d77e298 --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "en" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +The table function generates a temporary partition TABLE, which allows you to view the PARTITION list of a certain TABLE. + +This function is used in the from clause. + +This function is supported since 2.1.5 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions() Table structure: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:partition id +* PartitionName:partition name +* VisibleVersion:visible version +* VisibleVersionTime:visible version time +* State:state +* PartitionKey:partition key +* Range:range +* DistributionKey:distribution key +* Buckets:bucket num +* ReplicationNum:replication num +* StorageMedium:storage medium +* CooldownTime:cooldown time +* RemoteStoragePolicy:remote storage policy +* LastConsistencyCheckTime:last consistency check time +* DataSize:data size +* IsInMemory:is in memory +* ReplicaAllocation:replica allocation +* IsMutable:is mutable +* SyncWithBaseTables:Is it synchronized with the base table data (for partitioning asynchronous materialized views) +* UnsyncTables:Which base table data is not synchronized with (for partitions of asynchronous materialized views) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:partition name + +### Example + +1. View the partition list of table1 under db1 in the internal catalog + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. View the partition information with partition name partition1 under table1 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. View the partition ID with the partition name 'partition1' under Table 1 + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..57a15bc13e55d --- /dev/null +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,568 @@ +--- +{ + "title": "S3", + "language": "en" +} +--- + + + +## S3 + +### Name + +S3 + +### description + +S3 table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**parameter description** + +Each parameter in S3 tvf is a pair of `"key"="value"`. + +Related parameters for accessing S3: + +- `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method. +- `s3.access_key`: (required) +- `s3.secret_key`: (required) +- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default. +- `s3.session_token`: (optional) +- `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method. +- `force_parsing_by_standard_uri`: (optional) default `false` . We can add `force_parsing_by_standard_uri` parameter to force parsing unstandard uri as standard uri. + +> Note: +> For AWS S3, standard uri styles should be: +> +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88` +> 2. Virtual Host Style: `https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 3. Path Style: `https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> In addition to supporting the common uri styles of the above three standards, it also supports some other uri styles (maybe not common, but there may be): +> +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> For detailed use cases, you can refer to Best Practice at the bottom. + +file format parameter: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `jsonpaths`: (optional) default `""` +- `num_as_string`: (optional) default `"false"` +- `fuzzy_parse`: (optional) default `"false"` + +The following 2 parameters are used for loading in csv format + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other parameter: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Example + +Read and access csv format files on S3-compatible object storage. + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + +Can be used with `desc function` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + s3, table-valued-function, tvf + +### Best Practice + +Since the S3 table-valued-function does not know the table schema in advance, it will read the file first to parse out the table schema. + +**Usage of different uri schemas** +Example of http:// 、https:// + +```sql +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="true", s3 will be accessed in 'path style'. +select * from s3( + "URI" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="false", s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "https://bucket.endpoint/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// The OSS on Alibaba Cloud and The COS on Tencent Cloud will use 'virtual-hosted style' to access s3. +// OSS +select * from s3( + "URI" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "URI" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// The BOS on Baidu Cloud will use 'virtual-hosted style' compatible with the S3 protocol to access s3. +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +Example of s3://: + +```sql +// Note how to write your bucket of URI, no need to set 'use_path_style'. +// s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region" = "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +Example of other uri styles: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = false` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = true` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + +**csv format** +`csv` format: Read the file on S3 and process it as a csv file, read the first line in the file to parse out the table schema. The number of columns in the first line of the file `n` will be used as the number of columns in the table schema, and the column names of the table schema will be automatically named `c1, c2, ..., cn`, and the column type is set to `String` , for example: + +The file content of student1.csv: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +use `desc function S3()` to view the table schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` format: The first line of the file is used as the number and name of the columns of the table schema, and the column type is set to `String`, for example: + +The file content of student_with_names.csv: + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types format** + +`csv_with_names_and_types` format: Currently, it does not support parsing the column type from a csv file. When using this format, S3 tvf will parse the first line of the file as the number and name of the columns of the table schema, and set the column type to String. Meanwhile, the second line of the file is ignored. + +The file content of student_with_names_and_types.csv: + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**json format** + +`json` format: The json format involves many optional parameters, and the meaning of each parameter can be referred to: [Json Load](../../../data-operate/import/import-way/load-json-format.md). When S3 tvf queries the json format file, it locates a json object according to the `json_root` and `jsonpaths` parameters, and uses the `key` in the object as the column name of the table schema, and sets the column type to String. For example: + +The file content of data.json: + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +use S3 tvf: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**parquet format** + +`parquet` format: S3 tvf supports parsing the column names and column types of the table schema from the parquet file. Example: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` format: Same as `parquet` format, set `format` parameter to orc. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +**avro format** + +`avro` format: S3 tvf supports parsing the column names and column types of the table schema from the avro file. Example: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**uri contains wildcards** + +uri can use wildcards to read multiple files. Note: If wildcards are used, the format of each file must be consistent (especially csv/csv_with_names/csv_with_names_and_types count as different formats), S3 tvf uses the first file to parse out the table schema. For example: + +The following two csv files: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +You can use wildcards on the uri to query. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**Using `S3` tvf with `insert into` and `cast`** + +```sql +// Create doris internal table +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// Insert data using S3 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/docs/sql-manual/sql-functions/table-functions/workload-group.md b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md similarity index 94% rename from docs/sql-manual/sql-functions/table-functions/workload-group.md rename to versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md index 0c588485a7f15..6953bf3de1a51 100644 --- a/docs/sql-manual/sql-functions/table-functions/workload-group.md +++ b/versioned_docs/version-2.0/sql-manual/sql-functions/table-valued-functions/workload-group.md @@ -31,7 +31,7 @@ under the License. workload_groups :::caution -Since 2.1.1, this table function has been moved to the information_schema.workload_groups. +Deprecated. Since 2.1.1, this table function has been moved to the information_schema.workload_groups. ::: ### description @@ -70,4 +70,4 @@ mysql> select * from workload_groups()\G ### keywords - workload_groups \ No newline at end of file + workload_groups diff --git a/versioned_docs/version-2.1/data-operate/import/load-json-format.md b/versioned_docs/version-2.1/data-operate/import/load-json-format.md index c108cbd4fbd8b..5166b12f673a8 100644 --- a/versioned_docs/version-2.1/data-operate/import/load-json-format.md +++ b/versioned_docs/version-2.1/data-operate/import/load-json-format.md @@ -32,7 +32,7 @@ Doris supports importing data in JSON format. This document mainly describes the Currently, only the following import methods support data import in JSON format: -- Through [S3 table function](../../sql-manual/sql-functions/table-functions/s3.md) import statement: insert into table select * from S3(); +- Through [S3 table function](../../sql-manual/sql-functions/table-valued-functions/s3.md) import statement: insert into table select * from S3(); - Import the local JSON format file through [STREAM LOAD](../../sql-manual/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md). - Subscribe and consume JSON format in Kafka via [ROUTINE LOAD](../../sql-manual/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md) information. diff --git a/versioned_docs/version-2.1/lakehouse/datalake-analytics/iceberg.md b/versioned_docs/version-2.1/lakehouse/datalake-analytics/iceberg.md index 97712ff3395d7..4d2f2f311a1e1 100644 --- a/versioned_docs/version-2.1/lakehouse/datalake-analytics/iceberg.md +++ b/versioned_docs/version-2.1/lakehouse/datalake-analytics/iceberg.md @@ -268,4 +268,4 @@ You can use the `FOR TIME AS OF` and `FOR VERSION AS OF` statements to read hist `SELECT * FROM iceberg_tbl FOR VERSION AS OF 868895038966572;` -In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. +In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-valued-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. diff --git a/versioned_docs/version-2.1/lakehouse/file.md b/versioned_docs/version-2.1/lakehouse/file.md index 47bb1ec0609ef..5694086386af1 100644 --- a/versioned_docs/version-2.1/lakehouse/file.md +++ b/versioned_docs/version-2.1/lakehouse/file.md @@ -24,17 +24,17 @@ specific language governing permissions and limitations under the License. --> - - With the Table Value Function feature, Doris is able to query files in object storage or HDFS as simply as querying Tables. In addition, it supports automatic column type inference. ## Usage For more usage details, please see the documentation: -* [S3](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/s3/): supports file analysis on object storage compatible with S3 +* [S3](../sql-manual/sql-functions/table-valued-functions/s3.md): supports file analysis on object storage compatible with S3 + +* [HDFS](../sql-manual/sql-functions/table-valued-functions/hdfs.md): supports file analysis on HDFS -* [HDFS](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/hdfs/): supports file analysis on HDFS +* [LOCAL](../sql-manual/sql-functions/table-valued-functions/local.md): supports file analysis on local file system The followings illustrate how file analysis is conducted with the example of S3 Table Value Function. diff --git a/versioned_docs/version-2.1/query/view-materialized-view/async-materialized-view.md b/versioned_docs/version-2.1/query/view-materialized-view/async-materialized-view.md index a2566726f7367..f56733343f1a8 100644 --- a/versioned_docs/version-2.1/query/view-materialized-view/async-materialized-view.md +++ b/versioned_docs/version-2.1/query/view-materialized-view/async-materialized-view.md @@ -111,7 +111,7 @@ Specific syntax can be viewed [CREATE ASYNC MATERIALIZED VIEW](../../sql-manual/ select * from mv_infos("database"="tpch") where Name="mv1"; ``` -The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos.md) +The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos.md) Properties related to table, still viewed through [SHOW TABLES](../../sql-manual/sql-statements/Show-Statements/SHOW-TABLES.md) @@ -142,7 +142,7 @@ Task is used to describe specific refresh information, such as the time used for select * from jobs("type"="mv") order by CreateTime; ``` -Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-functions/jobs.md) +Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/jobs.md) #### Pause materialized view job scheduled scheduling @@ -170,7 +170,7 @@ Specific syntax can be viewed [RESUME MATERIALIZED VIEW JOB](../../sql-manual/sq select * from tasks("type"="mv"); ``` -Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) #### Cancel the task of objectifying the view @@ -438,7 +438,7 @@ The commonly used commands for `olapTable` are also applicable to materialized v The unique commands for materialized views mainly include the following: #### View materialized view metadata -[mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos) +[mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos) Focus on the following fields: - State: If the state changes to SCHEMA_CHANGE, it means the schema of the base table has changed. In this case, the materialized view cannot be used for transparent rewriting (but direct querying of the materialized view is not affected). If the next refresh task is successful, the state will be restored to NORMAL. @@ -446,7 +446,7 @@ Focus on the following fields: - RefreshState: The status of the last refresh task of the materialized view. If it is FAIL, it means the execution failed, and further localization can be done through tasks(). - SyncWithBaseTables: Whether the materialized view is synchronized with the base table data. If not synchronized, further determination can be made by using show partitions to identify which partition is not synchronized. #### View tasks for the materialized view -[tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +[tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) Focus on the following fields: - Status: If it is FAILED, it means the task execution failed. You can check the reason for failure through ErrorMsg. You can also search Doris logs using LastQueryId to get more detailed error information. diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md index 61c19aa0ff633..406aadccdad4a 100644 --- a/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/hdfs.md @@ -94,6 +94,10 @@ other kinds of parameters: - `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. - `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + ### Examples Read and access csv format files on hdfs storage. diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md new file mode 100644 index 0000000000000..d36471ead8603 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/partitions.md @@ -0,0 +1,128 @@ +--- +{ + "title": "PARTITIONS", + "language": "en" +} +--- + + + +## `partitions` + +### Name + +partitions + +### description + +The table function generates a temporary partition TABLE, which allows you to view the PARTITION list of a certain TABLE. + +This function is used in the from clause. + +#### syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions() Table structure: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:partition id +* PartitionName:partition name +* VisibleVersion:visible version +* VisibleVersionTime:visible version time +* State:state +* PartitionKey:partition key +* Range:range +* DistributionKey:distribution key +* Buckets:bucket num +* ReplicationNum:replication num +* StorageMedium:storage medium +* CooldownTime:cooldown time +* RemoteStoragePolicy:remote storage policy +* LastConsistencyCheckTime:last consistency check time +* DataSize:data size +* IsInMemory:is in memory +* ReplicaAllocation:replica allocation +* IsMutable:is mutable +* SyncWithBaseTables:Is it synchronized with the base table data (for partitioning asynchronous materialized views) +* UnsyncTables:Which base table data is not synchronized with (for partitions of asynchronous materialized views) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:partition name + +### example + +1. View the partition list of table1 under db1 in the internal catalog + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. View the partition information with partition name partition1 under table1 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. View the partition ID with the partition name 'partition1' under Table 1 + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### keywords + + partitions diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md index 05f62a13fd164..57a15bc13e55d 100644 --- a/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-functions/s3.md @@ -102,7 +102,11 @@ The following 2 parameters are used for loading in csv format other parameter: - `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. -- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. +- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: ### Example diff --git a/docs/sql-manual/sql-functions/table-functions/active_queries.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md similarity index 97% rename from docs/sql-manual/sql-functions/table-functions/active_queries.md rename to versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md index 52c8e6a0575be..79ee417dc4296 100644 --- a/docs/sql-manual/sql-functions/table-functions/active_queries.md +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/active_queries.md @@ -31,7 +31,7 @@ under the License. active_queries :::caution -Since 2.1.1, this table function has been moved to the information_schema.active_queries. +Deprecated. Since 2.1.1, this table function has been moved to the information_schema.active_queries. ::: ### description diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..0f1476fb9fd2e --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,112 @@ +--- +{ + "title": "BACKENDS", + "language": "en" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +Table-Value-Function, generate a temporary table named `backends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`backends()` + +The table schema of `backends()` tvf: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +The information displayed by the `backends` tvf is basically consistent with the information displayed by the `show backends` statement. However, the types of each field in the `backends` tvf are more specific, and you can use the `backends` tvf to perform operations such as filtering and joining. + +The information displayed by the `backends` tvf is authenticated, which is consistent with the behavior of `show backends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..e748297da7ff0 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,91 @@ +--- +{ + "title": "CATALOGS", + "language": "en" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +The table function generates a temporary table of catalogs to view the information of the catalogs created in the current Doris. + +This function is used in the from clause. + +#### syntax + +`catalogs()` + +Catalogs () table structure: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +The information presented by `catalogs()` tvf is the result of synthesizing `show catalogs` and `show catalog xxx` statements. + +The table generated by tvf can be used for filtering, join and other operations. + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..e66ec1200a790 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,102 @@ +--- +{ + "title": "FRONTENDS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends()` + +The table schema of `frontends()` tvf: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +The information displayed by the `frontends` tvf is basically consistent with the information displayed by the `show frontends` statement. However, the types of each field in the `frontends` tvf are more specific, and you can use the `frontends` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends` tvf is authenticated, which is consistent with the behavior of `show frontends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..0532fc477ebac --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,87 @@ +--- +{ + "title": "FRONTENDS_DISKS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends_disks`. This tvf is used to view the information of FE nodes 's disks in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends_disks()` + +The table schema of `frontends_disks()` tvf: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +The information displayed by the `frontends_disks` tvf is basically consistent with the information displayed by the `show frontends disks` statement. However, the types of each field in the `frontends_disks` tvf are more specific, and you can use the `frontends_disks` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends_disks` tvf is authenticated, which is consistent with the behavior of `show frontends disks`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..7748a302ab48b --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,162 @@ +--- +{ + "title": "HDFS", + "language": "en" +} +--- + + + +## HDFS + +### Name + +hdfs + +### Description + +HDFS table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +Related parameters for accessing hdfs: + +- `uri`: (required) hdfs uri. If the uri path does not exist or the files are empty files, hdfs tvf will return an empty result set. +- `fs.defaultFS`: (required) +- `hadoop.username`: (required) Can be any string, but cannot be empty. +- `hadoop.security.authentication`: (optional) +- `hadoop.username`: (optional) +- `hadoop.kerberos.principal`: (optional) +- `hadoop.kerberos.keytab`: (optional) +- `dfs.client.read.shortcircuit`: (optional) +- `dfs.domain.socket.path`: (optional) + +Related parameters for accessing HDFS in HA mode: + +- `dfs.nameservices`: (optional) +- `dfs.ha.namenodes.your-nameservices`: (optional) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`: (optional) +- `dfs.client.failover.proxy.provider.your-nameservices`: (optional) + +File format parameters: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + + The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `json_paths`: (optional) default `""` +- `num_as_string`: (optional) default `false` +- `fuzzy_parse`: (optional) default `false` + + The following 2 parameters are used for loading in csv format: + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other kinds of parameters: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Examples + +Read and access csv format files on hdfs storage. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Read and access csv format files on hdfs storage in HA mode. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Can be used with `desc function` : + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + hdfs, table-valued-function, tvf + +### Best Practice + + For more detailed usage of HDFS tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..e6788a858fc90 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,99 @@ +--- +{ +"title": "ICEBERG_META", +"language": "en" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta table-valued-function(tvf), Use for read iceberg metadata, operation history, snapshots of table, file metadata etc. + +#### syntax + +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**parameter description** + +Each parameter in iceberg_meta tvf is a pair of `"key"="value"`. + +Related parameters: +- `table`: (required) Use iceberg table name the format `catlog.database.table`. +- `query_type`: (required) The type of iceberg metadata. Only `snapshots` is currently supported. + +### Example + +Read and access the iceberg tabular metadata for snapshots. + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +Can be used with `desc function` : + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +Inspect the iceberg table snapshots : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +Filtered by snapshot_id : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md new file mode 100644 index 0000000000000..3bc7276e08e3e --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -0,0 +1,150 @@ +--- +{ + "title": "JOBS", + "language": "en" +} +--- + + + +## `jobs` + +### Name + +jobs + +### description + +Table function, generating a temporary task table, which can view job information in a certain task type. + +This function is used in the from clause. + +This function is supported since 2.1.0. + +#### syntax + +`jobs("type"="")` + +**parameter description** + +| parameter | description | type | required | +|:----------|:------------|:-------|:---------| +| type | job type | string | yes | + +the **type** supported types +- insert: insert into type job +- mv: materialized view job + +##### insert job +jobs("type"="insert")Table structure: +``` +mysql> desc function jobs("type"="insert"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| Definer | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ExecuteSql | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| SucceedTaskCount | TEXT | No | false | NULL | NONE | +| FailedTaskCount | TEXT | No | false | NULL | NONE | +| CanceledTaskCount | TEXT | No | false | NULL | NONE | +| Comment | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` +* Id: job ID. +* Name: job name. +* Definer: job definer. +* ExecuteType: Execution type +* RecurringStrategy: recurring strategy +* Status: Job status +* ExecuteSql: Execution SQL +* CreateTime: Job creation time +* SucceedTaskCount: Number of successful tasks +* FailedTaskCount: Number of failed tasks +* CanceledTaskCount: Number of canceled tasks +* Comment: job comment + +##### matterialized view job + +jobs("type"="mv")Table structure: +```sql +mysql> desc function jobs("type"="mv"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +10 rows in set (0.00 sec) +``` + +* Id: job ID. +* Name: job name. +* MvId: Materialized View ID +* MvName: Materialized View Name +* MvDatabaseId: DB ID of the materialized view +* MvDatabaseName: Name of the database to which the materialized view belongs +* ExecuteType: Execution type +* RecurringStrategy: Loop strategy +* Status: Job status +* CreateTime: Task creation time + +### example + +1. View jobs in all materialized views + +```sql +mysql> select * from jobs("type"="mv"); +``` + +2. View job with name `inner_mtmv_75043` + +```sql +mysql> select * from jobs("type"="mv") where Name="inner_mtmv_75043"; +``` + +3. View all insert jobs + +```sql +mysql> select * from jobs("type"="insert"); +``` +4. View job with name `one_insert_job` + +```sql +mysql> select * from jobs("type"="insert") where Name='one_insert_job'; +``` + +### keywords + + jobs, job, insert, mv, materialized view, schedule diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..4f39a8dae76ae --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,183 @@ +--- +{ + "title": "LOCAL", + "language": "en" +} +--- + + + +## local + +### Name + +local + +### Description + +Local table-valued-function(tvf), allows users to read and access local file contents on be node, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +It needs `ADMIN` privilege to use. + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +- Related parameters for accessing local file on be node: + + - `file_path`: + + (required) The path of the file to be read, which is a relative path to the `user_files_secure_path` directory, where `user_files_secure_path` parameter [can be configured on be](../../../admin-manual/config/be-config.md). + + Can not contains `..` in path. Support using glob syntax to match multi files, such as `log/*.log` + +- Related to execution method: + + In versions prior to 2.1.1, Doris only supported specifying a BE node to read local data files on that node. + + - `backend_id`: + + The be id where the file is located. `backend_id` can be obtained through the `show backends` command. + + Starting from version 2.1.2, Doris adds a new parameter `shared_storage`. + + - `shared_storage` + + Default is false. If true, the specified file exists on shared storage (such as NAS). Shared storage must be compatible with the POXIS file interface and mounted on all BE nodes at the same time. + + When `shared_storage` is true, you do not need to set `backend_id`, Doris may use all BE nodes for data access. If `backend_id` is set, still only executes on the specified BE node. + +- File format parameters: + + - `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`: (optional) default `,`. + - `line_delimiter`: (optional) default `\n`. + - `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +- The following parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (optional) default `"true"` + - `strip_outer_array`: (optional) default `"false"` + - `json_root`: (optional) default `""` + - `json_paths`: (optional) default `""` + - `num_as_string`: (optional) default `false` + - `fuzzy_parse`: (optional) default `false` + +- The following parameters are used for loading in csv format + + - `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. + - `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +### Examples + +Analyze the log file on specified BE: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +Read and access csv format files located at path `${DORIS_HOME}/student.csv`: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +Query files on NAS: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +Can be used with `desc function` : + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + +local, table-valued-function, tvf + +### Best Practice + +- For more detailed usage of local tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. + +- Access data on NAS through local tvf + + NAS shared storage allows to be mounted to multiple nodes at the same time. Each node can access files in the shared storage just like local files. Therefore, the NAS can be thought of as a local file system, accessed through local tvf. + + When setting `"shared_storage" = "true"`, Doris will think that the specified file can be accessed from any BE node. When a set of files is specified using wildcards, Doris will distribute requests to access files to multiple BE nodes, so that multiple nodes can be used to perform distributed file scanning and improve query performance. diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md new file mode 100644 index 0000000000000..e3938ace5e680 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -0,0 +1,102 @@ +--- +{ + "title": "MV_INFOS", + "language": "en" +} +--- + + + +## `mv_infos` + +### Name + +mv_infos + +### description + +Table function, generating temporary tables for asynchronous materialized views, which can view information about asynchronous materialized views created in a certain database. + +This function is used in the from clause. + +This funciton is supported since 2.1.0. + +#### syntax + +`mv_infos("database"="")` + +mv_infos() Table structure: +```sql +mysql> desc function mv_infos("database"="tpch100"); ++--------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------+---------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| SchemaChangeDetail | TEXT | No | false | NULL | NONE | +| RefreshState | TEXT | No | false | NULL | NONE | +| RefreshInfo | TEXT | No | false | NULL | NONE | +| QuerySql | TEXT | No | false | NULL | NONE | +| EnvInfo | TEXT | No | false | NULL | NONE | +| MvProperties | TEXT | No | false | NULL | NONE | +| MvPartitionInfo | TEXT | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | ++--------------------+---------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` + +* Id: Materialized View ID +* Name: Materialized View Name +* JobName: The job name corresponding to the materialized view +* State: Materialized View State +* SchemaChangeDetail: The reason why the materialized view State becomes a SchemeChange +* RefreshState: Materialized view refresh status +* RefreshInfo: Refreshing strategy information defined by materialized views +* QuerySql: Query statements defined by materialized views +* EnvInfo: Environmental information during the creation of materialized views +* MvProperties: Materialized visual attributes +* MvPartitionInfo: Partition information of materialized views +* SyncWithBaseTables:Is it synchronized with the base table data? To see which partition is not synchronized, please use [SHOW PARTITIONS](../sql-reference/Show-Statements/SHOW-PARTITIONS.md) + +### example + +1. View all materialized views under db1 + +```sql +mysql> select * from mv_infos("database"="db1"); +``` + +2. View the materialized view named mv1 under db1 + +```sql +mysql> select * from mv_infos("database"="db1") where Name = "mv1"; +``` + +3. View the status of the materialized view named mv1 under db1 + +```sql +mysql> select State from mv_infos("database"="db1") where Name = "mv1"; +``` + +### keywords + + mv, infos diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..7bda80d77e298 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "en" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +The table function generates a temporary partition TABLE, which allows you to view the PARTITION list of a certain TABLE. + +This function is used in the from clause. + +This function is supported since 2.1.5 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions() Table structure: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:partition id +* PartitionName:partition name +* VisibleVersion:visible version +* VisibleVersionTime:visible version time +* State:state +* PartitionKey:partition key +* Range:range +* DistributionKey:distribution key +* Buckets:bucket num +* ReplicationNum:replication num +* StorageMedium:storage medium +* CooldownTime:cooldown time +* RemoteStoragePolicy:remote storage policy +* LastConsistencyCheckTime:last consistency check time +* DataSize:data size +* IsInMemory:is in memory +* ReplicaAllocation:replica allocation +* IsMutable:is mutable +* SyncWithBaseTables:Is it synchronized with the base table data (for partitioning asynchronous materialized views) +* UnsyncTables:Which base table data is not synchronized with (for partitions of asynchronous materialized views) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:partition name + +### Example + +1. View the partition list of table1 under db1 in the internal catalog + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. View the partition information with partition name partition1 under table1 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. View the partition ID with the partition name 'partition1' under Table 1 + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md new file mode 100644 index 0000000000000..f3f0adfd5f2a3 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/query.md @@ -0,0 +1,110 @@ +--- +{ + "title": "QUERY", + "language": "en" +} +--- + + + +## query + +### Name + +query + +### description + +Query table function (table-valued-function, tvf) can be used to transparently transmit query statements directly to a catalog for data query + +:::info note +Supported by Doris version 2.1.3, currently only transparent query jdbc catalog is supported. +You need to create the corresponding catalog in Doris first. +::: + +#### syntax + +```sql +query( + "catalog" = "catalog_name", + "query" = "select * from db_name.table_name where condition" + ); +``` + +**Parameter Description** + +Each parameter in the query table function tvf is a `"key"="value"` pair. +Related parameters: +- `catalog`: (required) catalog name, which needs to be filled in according to the name of the catalog. +- `query`: (required) The query statement to be executed. + +### Example + +Use the query function to query tables in the jdbc data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +Can be used with `desc function` + +```sql +desc function query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +### Keywords + + query, table-valued-function, tvf + +### Best Prac + +Transparent query for tables in jdbc catalog data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from test.student"); ++------+---------+ +| id | name | ++------+---------+ +| 1 | alice | +| 2 | bob | +| 3 | jack | ++------+---------+ +select * from query("catalog" = "jdbc", "query" = "select * from test.score"); ++------+---------+ +| id | score | ++------+---------+ +| 1 | 100 | +| 2 | 90 | +| 3 | 80 | ++------+---------+ +``` + +Transparent join query for tables in jdbc catalog data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select a.id, a.name, b.score from test.student a join test.score b on a.id = b.id"); ++------+---------+---------+ +| id | name | score | ++------+---------+---------+ +| 1 | alice | 100 | +| 2 | bob | 90 | +| 3 | jack | 80 | ++------+---------+---------+ +``` diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..57a15bc13e55d --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,568 @@ +--- +{ + "title": "S3", + "language": "en" +} +--- + + + +## S3 + +### Name + +S3 + +### description + +S3 table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**parameter description** + +Each parameter in S3 tvf is a pair of `"key"="value"`. + +Related parameters for accessing S3: + +- `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method. +- `s3.access_key`: (required) +- `s3.secret_key`: (required) +- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default. +- `s3.session_token`: (optional) +- `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method. +- `force_parsing_by_standard_uri`: (optional) default `false` . We can add `force_parsing_by_standard_uri` parameter to force parsing unstandard uri as standard uri. + +> Note: +> For AWS S3, standard uri styles should be: +> +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88` +> 2. Virtual Host Style: `https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 3. Path Style: `https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> In addition to supporting the common uri styles of the above three standards, it also supports some other uri styles (maybe not common, but there may be): +> +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> For detailed use cases, you can refer to Best Practice at the bottom. + +file format parameter: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `jsonpaths`: (optional) default `""` +- `num_as_string`: (optional) default `"false"` +- `fuzzy_parse`: (optional) default `"false"` + +The following 2 parameters are used for loading in csv format + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other parameter: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Example + +Read and access csv format files on S3-compatible object storage. + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + +Can be used with `desc function` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + s3, table-valued-function, tvf + +### Best Practice + +Since the S3 table-valued-function does not know the table schema in advance, it will read the file first to parse out the table schema. + +**Usage of different uri schemas** +Example of http:// 、https:// + +```sql +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="true", s3 will be accessed in 'path style'. +select * from s3( + "URI" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="false", s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "https://bucket.endpoint/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// The OSS on Alibaba Cloud and The COS on Tencent Cloud will use 'virtual-hosted style' to access s3. +// OSS +select * from s3( + "URI" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "URI" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// The BOS on Baidu Cloud will use 'virtual-hosted style' compatible with the S3 protocol to access s3. +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +Example of s3://: + +```sql +// Note how to write your bucket of URI, no need to set 'use_path_style'. +// s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region" = "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +Example of other uri styles: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = false` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = true` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + +**csv format** +`csv` format: Read the file on S3 and process it as a csv file, read the first line in the file to parse out the table schema. The number of columns in the first line of the file `n` will be used as the number of columns in the table schema, and the column names of the table schema will be automatically named `c1, c2, ..., cn`, and the column type is set to `String` , for example: + +The file content of student1.csv: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +use `desc function S3()` to view the table schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` format: The first line of the file is used as the number and name of the columns of the table schema, and the column type is set to `String`, for example: + +The file content of student_with_names.csv: + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types format** + +`csv_with_names_and_types` format: Currently, it does not support parsing the column type from a csv file. When using this format, S3 tvf will parse the first line of the file as the number and name of the columns of the table schema, and set the column type to String. Meanwhile, the second line of the file is ignored. + +The file content of student_with_names_and_types.csv: + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**json format** + +`json` format: The json format involves many optional parameters, and the meaning of each parameter can be referred to: [Json Load](../../../data-operate/import/import-way/load-json-format.md). When S3 tvf queries the json format file, it locates a json object according to the `json_root` and `jsonpaths` parameters, and uses the `key` in the object as the column name of the table schema, and sets the column type to String. For example: + +The file content of data.json: + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +use S3 tvf: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**parquet format** + +`parquet` format: S3 tvf supports parsing the column names and column types of the table schema from the parquet file. Example: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` format: Same as `parquet` format, set `format` parameter to orc. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +**avro format** + +`avro` format: S3 tvf supports parsing the column names and column types of the table schema from the avro file. Example: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**uri contains wildcards** + +uri can use wildcards to read multiple files. Note: If wildcards are used, the format of each file must be consistent (especially csv/csv_with_names/csv_with_names_and_types count as different formats), S3 tvf uses the first file to parse out the table schema. For example: + +The following two csv files: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +You can use wildcards on the uri to query. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**Using `S3` tvf with `insert into` and `cast`** + +```sql +// Create doris internal table +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// Insert data using S3 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md new file mode 100644 index 0000000000000..ebd279effe65d --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -0,0 +1,174 @@ +--- +{ + "title": "TASKS", + "language": "en" +} +--- + + + +## `tasks` + +### Name + +:::tip +tasks +- since 2.1 +::: + +### description + +Table function, generates a temporary table of tasks, which allows you to view the information of tasks generated by jobs in the current Doris cluster. + +This function is used in the FROM clause. + +This functions is supported since 2.1.0. + +#### syntax + +`tasks("type"="insert");` +**parameter description** + +| parameter | description | type | required | +|:----------|:------------|:-------|:---------| +| type | job type | string | yes | + +the **type** supported types +- insert: insert into type job +- mv: materilized view type job + +##### Insert tasks + +The table schema of `tasks("type"="insert");` tvf: + +``` +mysql> desc function tasks("type"="insert");; ++---------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| Label | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| TrackingUrl | TEXT | No | false | NULL | NONE | +| LoadStatistic | TEXT | No | false | NULL | NONE | +| User | TEXT | No | false | NULL | NONE | ++---------------+------+------+-------+---------+-------+ +11 row in set (0.01 sec) +``` +- TaskId: task id +- JobId: job id +- JobName: job name +- Label: label +- Status: task status +- ErrorMsg: task failure information +- CreateTime: task creation time +- FinishTime: task completion time +- TrackingUrl: tracking URL +- LoadStatistic: load statistics +- User: user +##### MV Tasks +```sql +mysql> desc function tasks("type"="mv"); ++-----------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| StartTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| DurationMs | TEXT | No | false | NULL | NONE | +| TaskContext | TEXT | No | false | NULL | NONE | +| RefreshMode | TEXT | No | false | NULL | NONE | +| NeedRefreshPartitions | TEXT | No | false | NULL | NONE | +| CompletedPartitions | TEXT | No | false | NULL | NONE | +| Progress | TEXT | No | false | NULL | NONE | ++-----------------------+------+------+-------+---------+-------+ +18 rows in set (0.00 sec) +``` + +* TaskId: task id +* JobId: job id +* JobName: job Name +* MvId: Materialized View ID +* MvName: Materialized View Name +* MvDatabaseId: DB ID of the materialized view +* MvDatabaseName: Name of the database to which the materialized view belongs +* Status: task status +* ErrorMsg: Task failure information +* CreateTime: Task creation time +* StartTime: Task start running time +* FinishTime: Task End Run Time +* DurationMs: Task runtime +* TaskContext: Task running parameters +* RefreshMode: refresh mode +* NeedRefreshPartitions: The partition information that needs to be refreshed for this task +* CompletedPartitions: The partition information that has been refreshed for this task +* Progress: Task running progress +### example +#### Insert Tasls +``` +mysql> select * from tasks("type"="insert") limit 1 \G +*************************** 1. row *************************** + TaskId: 667704038678903 + JobId: 10069 + Label: 10069_667704038678903 + Status: FINISHED + EtlInfo: \N + TaskInfo: cluster:N/A; timeout(s):14400; max_filter_ratio:0.0; priority:NORMAL + ErrorMsg: \N + CreateTimeMs: 2023-12-08 16:46:57 + FinishTimeMs: 2023-12-08 16:46:57 + TrackingUrl: +LoadStatistic: {"Unfinished backends":{},"ScannedRows":0,"TaskNumber":0,"LoadBytes":0,"All backends":{},"FileNumber":0,"FileSize":0} + User: root +1 row in set (0.05 sec) + +``` +#### MV Tasks + +1. View tasks for all materialized views + +```sql +mysql> select * from tasks("type"="mv"); +``` + +2. View all tasks with jobName `inner_mtmv_75043` + +```sql +mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; +``` + + +### keywords + + tasks, job, insert, mv, materilized view diff --git a/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md new file mode 100644 index 0000000000000..6953bf3de1a51 --- /dev/null +++ b/versioned_docs/version-2.1/sql-manual/sql-functions/table-valued-functions/workload-group.md @@ -0,0 +1,73 @@ +--- +{ + "title": "WORKLOAD_GROUPS", + "language": "en" +} +--- + + + +## `workload_groups` + +### Name + +workload_groups + +:::caution +Deprecated. Since 2.1.1, this table function has been moved to the information_schema.workload_groups. +::: + +### description + +Table-Value-Function, generate a temporary table named `workload_groups`. This tvf is used to view information about workload groups for which current user has permission. + +This function is used in `FROM` clauses. + +#### syntax + +`workload_groups()` + +The table schema of `workload_groups()` tvf: +``` +mysql> desc function workload_groups(); ++-------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+-------------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | STRING | No | false | NULL | NONE | +| Item | STRING | No | false | NULL | NONE | +| Value | STRING | No | false | NULL | NONE | ++-------+-------------+------+-------+---------+-------+ +``` + +### example +``` +mysql> select * from workload_groups()\G ++-------+--------+--------------+-------+ +| Id | Name | Item | Value | ++-------+--------+--------------+-------+ +| 11001 | normal | memory_limit | 100% | +| 11001 | normal | cpu_share | 10 | ++-------+--------+--------------+-------+ +``` + +### keywords + + workload_groups diff --git a/versioned_docs/version-2.1/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md b/versioned_docs/version-2.1/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md index d584384369058..441dffc9a3ae1 100644 --- a/versioned_docs/version-2.1/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md +++ b/versioned_docs/version-2.1/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md @@ -65,8 +65,8 @@ Currently, only users with the ADMIN role can perform this operation. #### Related Documentation -[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-functions/jobs.md), -[TVF-TASKS](../../../sql-functions/table-functions/tasks.md) +[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-valued-functions/jobs.md), +[TVF-TASKS](../../../sql-functions/table-valued-functions/tasks.md) ### Grammar @@ -167,4 +167,4 @@ CREATE JOB my_job ON SCHEDULE EVERY 1 DAY STARTS '2020-01-01 00:00:00' ENDS '202 ### Keywords - CREATE, JOB, SCHEDULE \ No newline at end of file + CREATE, JOB, SCHEDULE diff --git a/versioned_docs/version-3.0/data-operate/import/load-json-format.md b/versioned_docs/version-3.0/data-operate/import/load-json-format.md index 0720d3a7fb5fc..97072b879b476 100644 --- a/versioned_docs/version-3.0/data-operate/import/load-json-format.md +++ b/versioned_docs/version-3.0/data-operate/import/load-json-format.md @@ -31,7 +31,7 @@ Doris supports importing data in JSON format. This document mainly describes the Currently, only the following import methods support data import in JSON format: -- Through [S3 table function](../../sql-manual/sql-functions/table-functions/s3.md) import statement: insert into table select * from S3(); +- Through [S3 table function](../../sql-manual/sql-functions/table-valued-functions/s3.md) import statement: insert into table select * from S3(); - Import the local JSON format file through [STREAM LOAD](../../sql-manual/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md). - Subscribe and consume JSON format in Kafka via [ROUTINE LOAD](../../sql-manual/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md) information. diff --git a/versioned_docs/version-3.0/lakehouse/datalake-analytics/iceberg.md b/versioned_docs/version-3.0/lakehouse/datalake-analytics/iceberg.md index 97712ff3395d7..4d2f2f311a1e1 100644 --- a/versioned_docs/version-3.0/lakehouse/datalake-analytics/iceberg.md +++ b/versioned_docs/version-3.0/lakehouse/datalake-analytics/iceberg.md @@ -268,4 +268,4 @@ You can use the `FOR TIME AS OF` and `FOR VERSION AS OF` statements to read hist `SELECT * FROM iceberg_tbl FOR VERSION AS OF 868895038966572;` -In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. +In addition, you can use the [iceberg_meta](../../sql-manual/sql-functions/table-valued-functions/iceberg-meta.md) table function to query the snapshot information of the specified table. diff --git a/versioned_docs/version-3.0/lakehouse/file.md b/versioned_docs/version-3.0/lakehouse/file.md index 710750f7b0bca..5694086386af1 100644 --- a/versioned_docs/version-3.0/lakehouse/file.md +++ b/versioned_docs/version-3.0/lakehouse/file.md @@ -30,9 +30,11 @@ With the Table Value Function feature, Doris is able to query files in object st For more usage details, please see the documentation: -* [S3](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/s3/): supports file analysis on object storage compatible with S3 +* [S3](../sql-manual/sql-functions/table-valued-functions/s3.md): supports file analysis on object storage compatible with S3 -* [HDFS](https://doris.apache.org/docs/dev/sql-manual/sql-functions/table-functions/hdfs/): supports file analysis on HDFS +* [HDFS](../sql-manual/sql-functions/table-valued-functions/hdfs.md): supports file analysis on HDFS + +* [LOCAL](../sql-manual/sql-functions/table-valued-functions/local.md): supports file analysis on local file system The followings illustrate how file analysis is conducted with the example of S3 Table Value Function. diff --git a/versioned_docs/version-3.0/query/view-materialized-view/async-materialized-view.md b/versioned_docs/version-3.0/query/view-materialized-view/async-materialized-view.md index f1cae80e82000..1031fb05f9a56 100644 --- a/versioned_docs/version-3.0/query/view-materialized-view/async-materialized-view.md +++ b/versioned_docs/version-3.0/query/view-materialized-view/async-materialized-view.md @@ -111,7 +111,7 @@ Specific syntax can be viewed [CREATE ASYNC MATERIALIZED VIEW](../../sql-manual/ select * from mv_infos("database"="tpch") where Name="mv1"; ``` -The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos.md) +The unique features of materialized views can be viewed through [mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos.md) Properties related to table, still viewed through [SHOW TABLES](../../sql-manual/sql-statements/Show-Statements/SHOW-TABLES.md) @@ -142,7 +142,7 @@ Task is used to describe specific refresh information, such as the time used for select * from jobs("type"="mv") order by CreateTime; ``` -Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-functions/jobs.md) +Specific syntax can be viewed [jobs("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/jobs.md) #### Pause materialized view job scheduled scheduling @@ -170,7 +170,7 @@ Specific syntax can be viewed [RESUME MATERIALIZED VIEW JOB](../../sql-manual/sq select * from tasks("type"="mv"); ``` -Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +Specific syntax can be viewed [tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) #### Cancel the task of objectifying the view @@ -536,7 +536,7 @@ The commonly used commands for `olapTable` are also applicable to materialized v The unique commands for materialized views mainly include the following: #### View materialized view metadata -[mv_infos()](../../sql-manual/sql-functions/table-functions/mv_infos) +[mv_infos()](../../sql-manual/sql-functions/table-valued-functions/mv_infos) Focus on the following fields: - State: If the state changes to SCHEMA_CHANGE, it means the schema of the base table has changed. In this case, the materialized view cannot be used for transparent rewriting (but direct querying of the materialized view is not affected). If the next refresh task is successful, the state will be restored to NORMAL. @@ -544,7 +544,7 @@ Focus on the following fields: - RefreshState: The status of the last refresh task of the materialized view. If it is FAIL, it means the execution failed, and further localization can be done through tasks(). - SyncWithBaseTables: Whether the materialized view is synchronized with the base table data. If not synchronized, further determination can be made by using show partitions to identify which partition is not synchronized. #### View tasks for the materialized view -[tasks("type"="mv")](../../sql-manual/sql-functions/table-functions/tasks.md) +[tasks("type"="mv")](../../sql-manual/sql-functions/table-valued-functions/tasks.md) Focus on the following fields: - Status: If it is FAILED, it means the task execution failed. You can check the reason for failure through ErrorMsg. You can also search Doris logs using LastQueryId to get more detailed error information. diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md new file mode 100644 index 0000000000000..0f1476fb9fd2e --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/backends.md @@ -0,0 +1,112 @@ +--- +{ + "title": "BACKENDS", + "language": "en" +} +--- + + + +## `backends` + +### Name + +backends + +### description + +Table-Value-Function, generate a temporary table named `backends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`backends()` + +The table schema of `backends()` tvf: +``` +mysql> desc function backends(); ++-------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------+------+-------+---------+-------+ +| BackendId | BIGINT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| HeartbeatPort | INT | No | false | NULL | NONE | +| BePort | INT | No | false | NULL | NONE | +| HttpPort | INT | No | false | NULL | NONE | +| BrpcPort | INT | No | false | NULL | NONE | +| LastStartTime | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| Alive | BOOLEAN | No | false | NULL | NONE | +| SystemDecommissioned | BOOLEAN | No | false | NULL | NONE | +| TabletNum | BIGINT | No | false | NULL | NONE | +| DataUsedCapacity | BIGINT | No | false | NULL | NONE | +| AvailCapacity | BIGINT | No | false | NULL | NONE | +| TotalCapacity | BIGINT | No | false | NULL | NONE | +| UsedPct | DOUBLE | No | false | NULL | NONE | +| MaxDiskUsedPct | DOUBLE | No | false | NULL | NONE | +| RemoteUsedCapacity | BIGINT | No | false | NULL | NONE | +| Tag | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| HeartbeatFailureCounter | INT | No | false | NULL | NONE | +| NodeRole | TEXT | No | false | NULL | NONE | ++-------------------------+---------+------+-------+---------+-------+ +23 rows in set (0.002 sec) +``` + +The information displayed by the `backends` tvf is basically consistent with the information displayed by the `show backends` statement. However, the types of each field in the `backends` tvf are more specific, and you can use the `backends` tvf to perform operations such as filtering and joining. + +The information displayed by the `backends` tvf is authenticated, which is consistent with the behavior of `show backends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from backends()\G +*************************** 1. row *************************** + BackendId: 10002 + Host: 10.xx.xx.90 + HeartbeatPort: 9053 + BePort: 9063 + HttpPort: 8043 + BrpcPort: 8069 + LastStartTime: 2023-06-15 16:51:02 + LastHeartbeat: 2023-06-15 17:09:58 + Alive: 1 + SystemDecommissioned: 0 + TabletNum: 21 + DataUsedCapacity: 0 + AvailCapacity: 5187141550081 + TotalCapacity: 7750977622016 + UsedPct: 33.077583202570978 + MaxDiskUsedPct: 33.077583202583881 + RemoteUsedCapacity: 0 + Tag: {"location" : "default"} + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + Status: {"lastSuccessReportTabletsTime":"2023-06-15 17:09:02","lastStreamLoadTime":-1,"isQueryDisabled":false,"isLoadDisabled":false} +HeartbeatFailureCounter: 0 + NodeRole: mix +1 row in set (0.038 sec) +``` + +### keywords + + backends \ No newline at end of file diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md new file mode 100644 index 0000000000000..e748297da7ff0 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/catalogs.md @@ -0,0 +1,91 @@ +--- +{ + "title": "CATALOGS", + "language": "en" +} +--- + + + +## `catalogs` + +### Name + + +catalogs + + +### description + +The table function generates a temporary table of catalogs to view the information of the catalogs created in the current Doris. + +This function is used in the from clause. + +#### syntax + +`catalogs()` + +Catalogs () table structure: +``` +mysql> desc function catalogs(); ++-------------+--------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------+------+-------+---------+-------+ +| CatalogId | BIGINT | No | false | NULL | NONE | +| CatalogName | TEXT | No | false | NULL | NONE | +| CatalogType | TEXT | No | false | NULL | NONE | +| Property | TEXT | No | false | NULL | NONE | +| Value | TEXT | No | false | NULL | NONE | ++-------------+--------+------+-------+---------+-------+ +5 rows in set (0.04 sec) +``` + +The information presented by `catalogs()` tvf is the result of synthesizing `show catalogs` and `show catalog xxx` statements. + +The table generated by tvf can be used for filtering, join and other operations. + + +### example + +``` +mysql> select * from catalogs(); ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| CatalogId | CatalogName | CatalogType | Property | Value | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +| 16725 | hive | hms | dfs.client.failover.proxy.provider.HANN | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider | +| 16725 | hive | hms | dfs.ha.namenodes.HANN | nn1,nn2 | +| 16725 | hive | hms | create_time | 2023-07-13 16:24:38.968 | +| 16725 | hive | hms | ipc.client.fallback-to-simple-auth-allowed | true | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn1 | nn1_host:rpc_port | +| 16725 | hive | hms | hive.metastore.uris | thrift://127.0.0.1:7004 | +| 16725 | hive | hms | dfs.namenode.rpc-address.HANN.nn2 | nn2_host:rpc_port | +| 16725 | hive | hms | type | hms | +| 16725 | hive | hms | dfs.nameservices | HANN | +| 0 | internal | internal | NULL | NULL | +| 16726 | es | es | create_time | 2023-07-13 16:24:44.922 | +| 16726 | es | es | type | es | +| 16726 | es | es | hosts | http://127.0.0.1:9200 | ++-----------+-------------+-------------+--------------------------------------------+---------------------------------------------------------------------------+ +13 rows in set (0.01 sec) +``` + +### keywords + + catalogs diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md new file mode 100644 index 0000000000000..e66ec1200a790 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends.md @@ -0,0 +1,102 @@ +--- +{ + "title": "FRONTENDS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends`. This tvf is used to view the information of BE nodes in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends()` + +The table schema of `frontends()` tvf: +``` +mysql> desc function frontends(); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| EditLogPort | TEXT | No | false | NULL | NONE | +| HttpPort | TEXT | No | false | NULL | NONE | +| QueryPort | TEXT | No | false | NULL | NONE | +| RpcPort | TEXT | No | false | NULL | NONE | +| ArrowFlightSqlPort| TEXT | No | false | NULL | NONE | +| Role | TEXT | No | false | NULL | NONE | +| IsMaster | TEXT | No | false | NULL | NONE | +| ClusterId | TEXT | No | false | NULL | NONE | +| Join | TEXT | No | false | NULL | NONE | +| Alive | TEXT | No | false | NULL | NONE | +| ReplayedJournalId | TEXT | No | false | NULL | NONE | +| LastHeartbeat | TEXT | No | false | NULL | NONE | +| IsHelper | TEXT | No | false | NULL | NONE | +| ErrMsg | TEXT | No | false | NULL | NONE | +| Version | TEXT | No | false | NULL | NONE | +| CurrentConnected | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +17 rows in set (0.022 sec) +``` + +The information displayed by the `frontends` tvf is basically consistent with the information displayed by the `show frontends` statement. However, the types of each field in the `frontends` tvf are more specific, and you can use the `frontends` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends` tvf is authenticated, which is consistent with the behavior of `show frontends`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends()\G +*************************** 1. row *************************** + Name: fe_5fa8bf19_fd6b_45cb_89c5_25a5ebc45582 + IP: 10.xx.xx.14 + EditLogPort: 9013 + HttpPort: 8034 + QueryPort: 9033 + RpcPort: 9023 +ArrowFlightSqlPort: 9040 + Role: FOLLOWER + IsMaster: true + ClusterId: 1258341841 + Join: true + Alive: true +ReplayedJournalId: 186 + LastHeartbeat: 2023-06-15 16:53:12 + IsHelper: true + ErrMsg: + Version: doris-0.0.0-trunk-4b18cde0c7 + CurrentConnected: Yes +1 row in set (0.060 sec) +``` + +### keywords + + frontends \ No newline at end of file diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md new file mode 100644 index 0000000000000..0532fc477ebac --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/frontends_disks.md @@ -0,0 +1,87 @@ +--- +{ + "title": "FRONTENDS_DISKS", + "language": "en" +} +--- + + + +## `frontends` + +### Name + +frontends + +### description + +Table-Value-Function, generate a temporary table named `frontends_disks`. This tvf is used to view the information of FE nodes 's disks in the doris cluster. + +This function is used in `FROM` clauses. + +#### syntax + +`frontends_disks()` + +The table schema of `frontends_disks()` tvf: +``` +mysql> desc function frontends_disks(); ++-------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+------+------+-------+---------+-------+ +| Name | TEXT | No | false | NULL | NONE | +| Host | TEXT | No | false | NULL | NONE | +| DirType | TEXT | No | false | NULL | NONE | +| Dir | TEXT | No | false | NULL | NONE | +| Filesystem | TEXT | No | false | NULL | NONE | +| Capacity | TEXT | No | false | NULL | NONE | +| Used | TEXT | No | false | NULL | NONE | +| Available | TEXT | No | false | NULL | NONE | +| UseRate | TEXT | No | false | NULL | NONE | +| MountOn | TEXT | No | false | NULL | NONE | ++-------------+------+------+-------+---------+-------+ +11 rows in set (0.14 sec) +``` + +The information displayed by the `frontends_disks` tvf is basically consistent with the information displayed by the `show frontends disks` statement. However, the types of each field in the `frontends_disks` tvf are more specific, and you can use the `frontends_disks` tvf to perform operations such as filtering and joining. + +The information displayed by the `frontends_disks` tvf is authenticated, which is consistent with the behavior of `show frontends disks`, user must have ADMIN/OPERATOR privelege. + +### example +``` +mysql> select * from frontends_disk()\G +*************************** 1. row *************************** + Name: fe_fe1d5bd9_d1e5_4ccc_9b03_ca79b95c9941 + Host: 172.XX.XX.1 + DirType: log + Dir: /data/doris/fe-github/log + Filesystem: /dev/sdc5 + Capacity: 366G + Used: 119G + Available: 228G + UseRate: 35% + MountOn: /data +...... +12 row in set (0.03 sec) +``` + +### keywords + + frontends_disks diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md new file mode 100644 index 0000000000000..7748a302ab48b --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/hdfs.md @@ -0,0 +1,162 @@ +--- +{ + "title": "HDFS", + "language": "en" +} +--- + + + +## HDFS + +### Name + +hdfs + +### Description + +HDFS table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +hdfs( + "uri" = "..", + "fs.defaultFS" = "...", + "hadoop.username" = "...", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +Related parameters for accessing hdfs: + +- `uri`: (required) hdfs uri. If the uri path does not exist or the files are empty files, hdfs tvf will return an empty result set. +- `fs.defaultFS`: (required) +- `hadoop.username`: (required) Can be any string, but cannot be empty. +- `hadoop.security.authentication`: (optional) +- `hadoop.username`: (optional) +- `hadoop.kerberos.principal`: (optional) +- `hadoop.kerberos.keytab`: (optional) +- `dfs.client.read.shortcircuit`: (optional) +- `dfs.domain.socket.path`: (optional) + +Related parameters for accessing HDFS in HA mode: + +- `dfs.nameservices`: (optional) +- `dfs.ha.namenodes.your-nameservices`: (optional) +- `dfs.namenode.rpc-address.your-nameservices.your-namenode`: (optional) +- `dfs.client.failover.proxy.provider.your-nameservices`: (optional) + +File format parameters: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc/avro` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + + The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `json_paths`: (optional) default `""` +- `num_as_string`: (optional) default `false` +- `fuzzy_parse`: (optional) default `false` + + The following 2 parameters are used for loading in csv format: + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other kinds of parameters: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. Hdfs Tvf can use the existing Hdfs resource to directly access Hdfs. You can refer to the method for creating an Hdfs resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4. + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Examples + +Read and access csv format files on hdfs storage. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Read and access csv format files on hdfs storage in HA mode. + +```sql +MySQL [(none)]> select * from hdfs( + "uri" = "hdfs://127.0.0.1:842/user/doris/csv_format_test/student.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv", + "dfs.nameservices" = "my_hdfs", + "dfs.ha.namenodes.my_hdfs" = "nn1,nn2", + "dfs.namenode.rpc-address.my_hdfs.nn1" = "nanmenode01:8020", + "dfs.namenode.rpc-address.my_hdfs.nn2" = "nanmenode02:8020", + "dfs.client.failover.proxy.provider.my_hdfs" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); ++------+---------+------+ +| c1 | c2 | c3 | ++------+---------+------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | 18 | ++------+---------+------+ +``` + +Can be used with `desc function` : + +```sql +MySQL [(none)]> desc function hdfs( + "uri" = "hdfs://127.0.0.1:8424/user/doris/csv_format_test/student_with_names.csv", + "fs.defaultFS" = "hdfs://127.0.0.1:8424", + "hadoop.username" = "doris", + "format" = "csv_with_names"); +``` + +### Keywords + + hdfs, table-valued-function, tvf + +### Best Practice + + For more detailed usage of HDFS tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md new file mode 100644 index 0000000000000..e6788a858fc90 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/iceberg-meta.md @@ -0,0 +1,99 @@ +--- +{ +"title": "ICEBERG_META", +"language": "en" +} +--- + + + +## iceberg_meta + +### Name + +iceberg_meta + +### description + +iceberg_meta table-valued-function(tvf), Use for read iceberg metadata, operation history, snapshots of table, file metadata etc. + +#### syntax + +```sql +iceberg_meta( + "table" = "ctl.db.tbl", + "query_type" = "snapshots" + ... + ); +``` + +**parameter description** + +Each parameter in iceberg_meta tvf is a pair of `"key"="value"`. + +Related parameters: +- `table`: (required) Use iceberg table name the format `catlog.database.table`. +- `query_type`: (required) The type of iceberg metadata. Only `snapshots` is currently supported. + +### Example + +Read and access the iceberg tabular metadata for snapshots. + +```sql +select * from iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); + +``` + +Can be used with `desc function` : + +```sql +desc function iceberg_meta("table" = "ctl.db.tbl", "query_type" = "snapshots"); +``` + +### Keywords + + iceberg_meta, table-valued-function, tvf + +### Best Prac + +Inspect the iceberg table snapshots : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots"); ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-20 11:14:29 | 64123452344 | -1 | append | hdfs:/path/to/m1 | {"flink.job-id":"xxm1", ...} | +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | +| 2022-09-21 21:44:11 | 51232845315 | 98865735822 | overwrite | hdfs:/path/to/m3 | {"flink.job-id":"xxm3", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` + +Filtered by snapshot_id : + +```sql +select * from iceberg_meta("table" = "iceberg_ctl.test_db.test_tbl", "query_type" = "snapshots") +where snapshot_id = 98865735822; ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| committed_at | snapshot_id | parent_id | operation | manifest_list | summary | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +| 2022-09-21 10:36:35 | 98865735822 | 64123452344 | overwrite | hdfs:/path/to/m2 | {"flink.job-id":"xxm2", ...} | ++------------------------+----------------+---------------+-----------+-------------------+------------------------------+ +``` diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md new file mode 100644 index 0000000000000..3bc7276e08e3e --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/jobs.md @@ -0,0 +1,150 @@ +--- +{ + "title": "JOBS", + "language": "en" +} +--- + + + +## `jobs` + +### Name + +jobs + +### description + +Table function, generating a temporary task table, which can view job information in a certain task type. + +This function is used in the from clause. + +This function is supported since 2.1.0. + +#### syntax + +`jobs("type"="")` + +**parameter description** + +| parameter | description | type | required | +|:----------|:------------|:-------|:---------| +| type | job type | string | yes | + +the **type** supported types +- insert: insert into type job +- mv: materialized view job + +##### insert job +jobs("type"="insert")Table structure: +``` +mysql> desc function jobs("type"="insert"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| Definer | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ExecuteSql | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| SucceedTaskCount | TEXT | No | false | NULL | NONE | +| FailedTaskCount | TEXT | No | false | NULL | NONE | +| CanceledTaskCount | TEXT | No | false | NULL | NONE | +| Comment | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` +* Id: job ID. +* Name: job name. +* Definer: job definer. +* ExecuteType: Execution type +* RecurringStrategy: recurring strategy +* Status: Job status +* ExecuteSql: Execution SQL +* CreateTime: Job creation time +* SucceedTaskCount: Number of successful tasks +* FailedTaskCount: Number of failed tasks +* CanceledTaskCount: Number of canceled tasks +* Comment: job comment + +##### matterialized view job + +jobs("type"="mv")Table structure: +```sql +mysql> desc function jobs("type"="mv"); ++-------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+------+------+-------+---------+-------+ +| Id | TEXT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| ExecuteType | TEXT | No | false | NULL | NONE | +| RecurringStrategy | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | ++-------------------+------+------+-------+---------+-------+ +10 rows in set (0.00 sec) +``` + +* Id: job ID. +* Name: job name. +* MvId: Materialized View ID +* MvName: Materialized View Name +* MvDatabaseId: DB ID of the materialized view +* MvDatabaseName: Name of the database to which the materialized view belongs +* ExecuteType: Execution type +* RecurringStrategy: Loop strategy +* Status: Job status +* CreateTime: Task creation time + +### example + +1. View jobs in all materialized views + +```sql +mysql> select * from jobs("type"="mv"); +``` + +2. View job with name `inner_mtmv_75043` + +```sql +mysql> select * from jobs("type"="mv") where Name="inner_mtmv_75043"; +``` + +3. View all insert jobs + +```sql +mysql> select * from jobs("type"="insert"); +``` +4. View job with name `one_insert_job` + +```sql +mysql> select * from jobs("type"="insert") where Name='one_insert_job'; +``` + +### keywords + + jobs, job, insert, mv, materialized view, schedule diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md new file mode 100644 index 0000000000000..4f39a8dae76ae --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/local.md @@ -0,0 +1,183 @@ +--- +{ + "title": "LOCAL", + "language": "en" +} +--- + + + +## local + +### Name + +local + +### Description + +Local table-valued-function(tvf), allows users to read and access local file contents on be node, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +It needs `ADMIN` privilege to use. + +#### syntax + +```sql +local( + "file_path" = "path/to/file.txt", + "backend_id" = "be_id", + "format" = "csv", + "keyn" = "valuen" + ... + ); +``` + +**parameter description** + +- Related parameters for accessing local file on be node: + + - `file_path`: + + (required) The path of the file to be read, which is a relative path to the `user_files_secure_path` directory, where `user_files_secure_path` parameter [can be configured on be](../../../admin-manual/config/be-config.md). + + Can not contains `..` in path. Support using glob syntax to match multi files, such as `log/*.log` + +- Related to execution method: + + In versions prior to 2.1.1, Doris only supported specifying a BE node to read local data files on that node. + + - `backend_id`: + + The be id where the file is located. `backend_id` can be obtained through the `show backends` command. + + Starting from version 2.1.2, Doris adds a new parameter `shared_storage`. + + - `shared_storage` + + Default is false. If true, the specified file exists on shared storage (such as NAS). Shared storage must be compatible with the POXIS file interface and mounted on all BE nodes at the same time. + + When `shared_storage` is true, you do not need to set `backend_id`, Doris may use all BE nodes for data access. If `backend_id` is set, still only executes on the specified BE node. + +- File format parameters: + + - `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` + - `column_separator`: (optional) default `,`. + - `line_delimiter`: (optional) default `\n`. + - `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +- The following parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + + - `read_json_by_line`: (optional) default `"true"` + - `strip_outer_array`: (optional) default `"false"` + - `json_root`: (optional) default `""` + - `json_paths`: (optional) default `""` + - `num_as_string`: (optional) default `false` + - `fuzzy_parse`: (optional) default `false` + +- The following parameters are used for loading in csv format + + - `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. + - `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +### Examples + +Analyze the log file on specified BE: + +```sql +mysql> select * from local( + "file_path" = "log/be.out", + "backend_id" = "10006", + "format" = "csv") + where c1 like "%start_time%" limit 10; ++--------------------------------------------------------+ +| c1 | ++--------------------------------------------------------+ +| start time: 2023年 08月 07日 星期一 23:20:32 CST | +| start time: 2023年 08月 07日 星期一 23:32:10 CST | +| start time: 2023年 08月 08日 星期二 00:20:50 CST | +| start time: 2023年 08月 08日 星期二 00:29:15 CST | ++--------------------------------------------------------+ +``` + +Read and access csv format files located at path `${DORIS_HOME}/student.csv`: + +```sql +mysql> select * from local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++------+---------+--------+ +| c1 | c2 | c3 | ++------+---------+--------+ +| 1 | alice | 18 | +| 2 | bob | 20 | +| 3 | jack | 24 | +| 4 | jackson | 19 | +| 5 | liming | d18 | ++------+---------+--------+ +``` + +Query files on NAS: + +```sql +mysql> select * from local( + "file_path" = "/mnt/doris/prefix_*.txt", + "format" = "csv", + "column_separator" =",", + "shared_storage" = "true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | ++------+------+------+ +``` + +Can be used with `desc function` : + +```sql +mysql> desc function local( + "file_path" = "student.csv", + "backend_id" = "10003", + "format" = "csv"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +### Keywords + +local, table-valued-function, tvf + +### Best Practice + +- For more detailed usage of local tvf, please refer to [S3](./s3.md) tvf, The only difference between them is the way of accessing the storage system. + +- Access data on NAS through local tvf + + NAS shared storage allows to be mounted to multiple nodes at the same time. Each node can access files in the shared storage just like local files. Therefore, the NAS can be thought of as a local file system, accessed through local tvf. + + When setting `"shared_storage" = "true"`, Doris will think that the specified file can be accessed from any BE node. When a set of files is specified using wildcards, Doris will distribute requests to access files to multiple BE nodes, so that multiple nodes can be used to perform distributed file scanning and improve query performance. diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md new file mode 100644 index 0000000000000..e3938ace5e680 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/mv_infos.md @@ -0,0 +1,102 @@ +--- +{ + "title": "MV_INFOS", + "language": "en" +} +--- + + + +## `mv_infos` + +### Name + +mv_infos + +### description + +Table function, generating temporary tables for asynchronous materialized views, which can view information about asynchronous materialized views created in a certain database. + +This function is used in the from clause. + +This funciton is supported since 2.1.0. + +#### syntax + +`mv_infos("database"="")` + +mv_infos() Table structure: +```sql +mysql> desc function mv_infos("database"="tpch100"); ++--------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------+---------+------+-------+---------+-------+ +| Id | BIGINT | No | false | NULL | NONE | +| Name | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| SchemaChangeDetail | TEXT | No | false | NULL | NONE | +| RefreshState | TEXT | No | false | NULL | NONE | +| RefreshInfo | TEXT | No | false | NULL | NONE | +| QuerySql | TEXT | No | false | NULL | NONE | +| EnvInfo | TEXT | No | false | NULL | NONE | +| MvProperties | TEXT | No | false | NULL | NONE | +| MvPartitionInfo | TEXT | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | ++--------------------+---------+------+-------+---------+-------+ +12 rows in set (0.01 sec) +``` + +* Id: Materialized View ID +* Name: Materialized View Name +* JobName: The job name corresponding to the materialized view +* State: Materialized View State +* SchemaChangeDetail: The reason why the materialized view State becomes a SchemeChange +* RefreshState: Materialized view refresh status +* RefreshInfo: Refreshing strategy information defined by materialized views +* QuerySql: Query statements defined by materialized views +* EnvInfo: Environmental information during the creation of materialized views +* MvProperties: Materialized visual attributes +* MvPartitionInfo: Partition information of materialized views +* SyncWithBaseTables:Is it synchronized with the base table data? To see which partition is not synchronized, please use [SHOW PARTITIONS](../sql-reference/Show-Statements/SHOW-PARTITIONS.md) + +### example + +1. View all materialized views under db1 + +```sql +mysql> select * from mv_infos("database"="db1"); +``` + +2. View the materialized view named mv1 under db1 + +```sql +mysql> select * from mv_infos("database"="db1") where Name = "mv1"; +``` + +3. View the status of the materialized view named mv1 under db1 + +```sql +mysql> select State from mv_infos("database"="db1") where Name = "mv1"; +``` + +### keywords + + mv, infos diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md new file mode 100644 index 0000000000000..7bda80d77e298 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/partitions.md @@ -0,0 +1,130 @@ +--- +{ + "title": "PARTITIONS", + "language": "en" +} +--- + + + +## `partitions` + +### Name + +partitions + +### Description + +The table function generates a temporary partition TABLE, which allows you to view the PARTITION list of a certain TABLE. + +This function is used in the from clause. + +This function is supported since 2.1.5 + +#### Syntax + +`partitions("catalog"="","database"="","table"="")` + +partitions() Table structure: +```sql +mysql> desc function partitions("catalog"="internal","database"="zd","table"="user"); ++--------------------------+---------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++--------------------------+---------+------+-------+---------+-------+ +| PartitionId | BIGINT | No | false | NULL | NONE | +| PartitionName | TEXT | No | false | NULL | NONE | +| VisibleVersion | BIGINT | No | false | NULL | NONE | +| VisibleVersionTime | TEXT | No | false | NULL | NONE | +| State | TEXT | No | false | NULL | NONE | +| PartitionKey | TEXT | No | false | NULL | NONE | +| Range | TEXT | No | false | NULL | NONE | +| DistributionKey | TEXT | No | false | NULL | NONE | +| Buckets | INT | No | false | NULL | NONE | +| ReplicationNum | INT | No | false | NULL | NONE | +| StorageMedium | TEXT | No | false | NULL | NONE | +| CooldownTime | TEXT | No | false | NULL | NONE | +| RemoteStoragePolicy | TEXT | No | false | NULL | NONE | +| LastConsistencyCheckTime | TEXT | No | false | NULL | NONE | +| DataSize | TEXT | No | false | NULL | NONE | +| IsInMemory | BOOLEAN | No | false | NULL | NONE | +| ReplicaAllocation | TEXT | No | false | NULL | NONE | +| IsMutable | BOOLEAN | No | false | NULL | NONE | +| SyncWithBaseTables | BOOLEAN | No | false | NULL | NONE | +| UnsyncTables | TEXT | No | false | NULL | NONE | ++--------------------------+---------+------+-------+---------+-------+ +20 rows in set (0.02 sec) +``` + +* PartitionId:partition id +* PartitionName:partition name +* VisibleVersion:visible version +* VisibleVersionTime:visible version time +* State:state +* PartitionKey:partition key +* Range:range +* DistributionKey:distribution key +* Buckets:bucket num +* ReplicationNum:replication num +* StorageMedium:storage medium +* CooldownTime:cooldown time +* RemoteStoragePolicy:remote storage policy +* LastConsistencyCheckTime:last consistency check time +* DataSize:data size +* IsInMemory:is in memory +* ReplicaAllocation:replica allocation +* IsMutable:is mutable +* SyncWithBaseTables:Is it synchronized with the base table data (for partitioning asynchronous materialized views) +* UnsyncTables:Which base table data is not synchronized with (for partitions of asynchronous materialized views) + +```sql +mysql> desc function partitions("catalog"="hive","database"="zdtest","table"="com2"); ++-----------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+------+------+-------+---------+-------+ +| Partition | TEXT | No | false | NULL | NONE | ++-----------+------+------+-------+---------+-------+ +1 row in set (0.11 sec) +``` + +* Partition:partition name + +### Example + +1. View the partition list of table1 under db1 in the internal catalog + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1"); +``` + +2. View the partition information with partition name partition1 under table1 + +```sql +mysql> select * from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +3. View the partition ID with the partition name 'partition1' under Table 1 + +```sql +mysql> select PartitionId from partitions("catalog"="internal","database"="db1","table"="table1") where PartitionName = "partition1"; +``` + +### Keywords + + partitions diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md new file mode 100644 index 0000000000000..f3f0adfd5f2a3 --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/query.md @@ -0,0 +1,110 @@ +--- +{ + "title": "QUERY", + "language": "en" +} +--- + + + +## query + +### Name + +query + +### description + +Query table function (table-valued-function, tvf) can be used to transparently transmit query statements directly to a catalog for data query + +:::info note +Supported by Doris version 2.1.3, currently only transparent query jdbc catalog is supported. +You need to create the corresponding catalog in Doris first. +::: + +#### syntax + +```sql +query( + "catalog" = "catalog_name", + "query" = "select * from db_name.table_name where condition" + ); +``` + +**Parameter Description** + +Each parameter in the query table function tvf is a `"key"="value"` pair. +Related parameters: +- `catalog`: (required) catalog name, which needs to be filled in according to the name of the catalog. +- `query`: (required) The query statement to be executed. + +### Example + +Use the query function to query tables in the jdbc data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +Can be used with `desc function` + +```sql +desc function query("catalog" = "jdbc", "query" = "select * from db_name.table_name where condition"); +``` + +### Keywords + + query, table-valued-function, tvf + +### Best Prac + +Transparent query for tables in jdbc catalog data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select * from test.student"); ++------+---------+ +| id | name | ++------+---------+ +| 1 | alice | +| 2 | bob | +| 3 | jack | ++------+---------+ +select * from query("catalog" = "jdbc", "query" = "select * from test.score"); ++------+---------+ +| id | score | ++------+---------+ +| 1 | 100 | +| 2 | 90 | +| 3 | 80 | ++------+---------+ +``` + +Transparent join query for tables in jdbc catalog data source + +```sql +select * from query("catalog" = "jdbc", "query" = "select a.id, a.name, b.score from test.student a join test.score b on a.id = b.id"); ++------+---------+---------+ +| id | name | score | ++------+---------+---------+ +| 1 | alice | 100 | +| 2 | bob | 90 | +| 3 | jack | 80 | ++------+---------+---------+ +``` diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md new file mode 100644 index 0000000000000..57a15bc13e55d --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/s3.md @@ -0,0 +1,568 @@ +--- +{ + "title": "S3", + "language": "en" +} +--- + + + +## S3 + +### Name + +S3 + +### description + +S3 table-valued-function(tvf), allows users to read and access file contents on S3-compatible object storage, just like accessing relational table. Currently supports `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` file format. + +#### syntax + +```sql +s3( + "uri" = "..", + "s3.access_key" = "...", + "s3.secret_key" = "...", + "s3.region" = "...", + "format" = "csv", + "keyn" = "valuen", + ... + ); +``` + +**parameter description** + +Each parameter in S3 tvf is a pair of `"key"="value"`. + +Related parameters for accessing S3: + +- `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method. +- `s3.access_key`: (required) +- `s3.secret_key`: (required) +- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default. +- `s3.session_token`: (optional) +- `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method. +- `force_parsing_by_standard_uri`: (optional) default `false` . We can add `force_parsing_by_standard_uri` parameter to force parsing unstandard uri as standard uri. + +> Note: +> For AWS S3, standard uri styles should be: +> +> 1. AWS Client Style(Hadoop S3 Style): `s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88` +> 2. Virtual Host Style: `https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 3. Path Style: `https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> In addition to supporting the common uri styles of the above three standards, it also supports some other uri styles (maybe not common, but there may be): +> +> 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: +> `s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> 2. Path AWS Client (Hadoop S3) Mixed Style: +> `s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88` +> +> For detailed use cases, you can refer to Best Practice at the bottom. + +file format parameter: + +- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc` +- `column_separator`: (optional) default `\t`. +- `line_delimiter`: (optional) default `\n`. +- `compress_type`: (optional) Currently support `UNKNOWN/PLAIN/GZ/LZO/BZ2/LZ4FRAME/DEFLATE/SNAPPYBLOCK`. Default value is `UNKNOWN`, it will automatically infer the type based on the suffix of `uri`. + +The following 6 parameters are used for loading in json format. For specific usage methods, please refer to: [Json Load](../../../data-operate/import/import-way/load-json-format.md) + +- `read_json_by_line`: (optional) default `"true"` +- `strip_outer_array`: (optional) default `"false"` +- `json_root`: (optional) default `""` +- `jsonpaths`: (optional) default `""` +- `num_as_string`: (optional) default `"false"` +- `fuzzy_parse`: (optional) default `"false"` + +The following 2 parameters are used for loading in csv format + +- `trim_double_quotes`: Boolean type (optional), the default value is `false`. True means that the outermost double quotes of each field in the csv file are trimmed. +- `skip_lines`: Integer type (optional), the default value is 0. It will skip some lines in the head of csv file. It will be disabled when the format is `csv_with_names` or `csv_with_names_and_types`. + +other parameter: + +- `path_partition_keys`: (optional) Specifies the column names carried in the file path. For example, if the file path is /path/to/city=beijing/date="2023-07-09", you should fill in `path_partition_keys="city,date"`. It will automatically read the corresponding column names and values from the path during load process. +- `resource`:(optional)Specify the resource name. S3 tvf can use the existing S3 resource to directly access S3. You can refer to the method for creating an S3 resource: [CREATE-RESOURCE](../../sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md). This property is supported starting from version 2.1.4 . + +:::tip Tip +To directly query a TVF or create a VIEW based on that TVF, you need to have usage permission for that resource. To query a VIEW created based on TVF, you only need select permission for that VIEW. +::: + +### Example + +Read and access csv format files on S3-compatible object storage. + +```sql +select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true") order by c1; +``` + +Can be used with `desc function` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style" = "true"); +``` + +### Keywords + + s3, table-valued-function, tvf + +### Best Practice + +Since the S3 table-valued-function does not know the table schema in advance, it will read the file first to parse out the table schema. + +**Usage of different uri schemas** +Example of http:// 、https:// + +```sql +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="true", s3 will be accessed in 'path style'. +select * from s3( + "URI" = "https://endpoint/bucket/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true"); + +// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://. +// Because of "use_path_style"="false", s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "https://bucket.endpoint/file/student.csv", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false"); + +// The OSS on Alibaba Cloud and The COS on Tencent Cloud will use 'virtual-hosted style' to access s3. +// OSS +select * from s3( + "URI" = "http://example-bucket.oss-cn-beijing.aliyuncs.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "oss-cn-beijing", + "format" = "parquet", + "use_path_style" = "false"); +// COS +select * from s3( + "URI" = "https://example-bucket.cos.ap-hongkong.myqcloud.com/your-folder/file.parquet", + "s3.access_key" = "ak", + "s3.secret_key" = "sk", + "region" = "ap-hongkong", + "format" = "parquet", + "use_path_style" = "false"); + +// The BOS on Baidu Cloud will use 'virtual-hosted style' compatible with the S3 protocol to access s3. +// BOS +select * from s3( + "uri" = "https://example-bucket.s3.bj.bcebos.com/your-folder/file.parquet", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "bj", + "format" = "parquet", + "use_path_style" = "false"); +``` + +// MinIO +select * from s3( + "uri" = "s3://bucket/file.csv", + "s3.endpoint" = "", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "s3.region" = "us-east-1", + "format" = "csv" +); + +Example of s3://: + +```sql +// Note how to write your bucket of URI, no need to set 'use_path_style'. +// s3 will be accessed in 'virtual-hosted style'. +select * from s3( + "URI" = "s3://bucket/file/student.csv", + "s3.endpoint"= "endpont", + "s3.region" = "region", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv"); +``` + +Example of other uri styles: + +```sql +// Virtual Host AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = false` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="false", + "force_parsing_by_standard_uri"="true"); + +// Path AWS Client (Hadoop S3) Mixed Style. Used by setting `use_path_style = true` and `force_parsing_by_standard_uri = true`. +select * from s3( + "URI" = "s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88", + "s3.access_key"= "ak", + "s3.secret_key" = "sk", + "format" = "csv", + "use_path_style"="true", + "force_parsing_by_standard_uri"="true"); +``` + +**csv format** +`csv` format: Read the file on S3 and process it as a csv file, read the first line in the file to parse out the table schema. The number of columns in the first line of the file `n` will be used as the number of columns in the table schema, and the column names of the table schema will be automatically named `c1, c2, ..., cn`, and the column type is set to `String` , for example: + +The file content of student1.csv: + +``` +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true") order by c1; ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +use `desc function S3()` to view the table schema + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| c1 | TEXT | Yes | false | NULL | NONE | +| c2 | TEXT | Yes | false | NULL | NONE | +| c3 | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names format** +`csv_with_names` format: The first line of the file is used as the number and name of the columns of the table schema, and the column type is set to `String`, for example: + +The file content of student_with_names.csv: + +``` +id,name,age +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**csv_with_names_and_types format** + +`csv_with_names_and_types` format: Currently, it does not support parsing the column type from a csv file. When using this format, S3 tvf will parse the first line of the file as the number and name of the columns of the table schema, and set the column type to String. Meanwhile, the second line of the file is ignored. + +The file content of student_with_names_and_types.csv: + +``` +id,name,age +INT,STRING,INT +1,ftw,12 +2,zs,18 +3,ww,20 +``` + +use S3 tvf + +```sql +MySQL [(none)]> select * from s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true") order by id; ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 12 | +| 2 | zs | 18 | +| 3 | ww | 20 | ++------+------+------+ +``` + +```sql +MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student_with_names_and_types.csv", +-> "s3.access_key"= "minioadmin", +-> "s3.secret_key" = "minioadmin", +-> "format" = "csv_with_names_and_types", +-> "use_path_style" = "true"); ++-------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------+------+------+-------+---------+-------+ +| id | TEXT | Yes | false | NULL | NONE | +| name | TEXT | Yes | false | NULL | NONE | +| age | TEXT | Yes | false | NULL | NONE | ++-------+------+------+-------+---------+-------+ +``` + +**json format** + +`json` format: The json format involves many optional parameters, and the meaning of each parameter can be referred to: [Json Load](../../../data-operate/import/import-way/load-json-format.md). When S3 tvf queries the json format file, it locates a json object according to the `json_root` and `jsonpaths` parameters, and uses the `key` in the object as the column name of the table schema, and sets the column type to String. For example: + +The file content of data.json: + +``` +[{"id":1, "name":"ftw", "age":18}] +[{"id":2, "name":"xxx", "age":17}] +[{"id":3, "name":"yyy", "age":19}] +``` + +use S3 tvf: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style"="true"); ++------+------+------+ +| id | name | age | ++------+------+------+ +| 1 | ftw | 18 | +| 2 | xxx | 17 | +| 3 | yyy | 19 | ++------+------+------+ + +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/data.json", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "json", + "strip_outer_array" = "true", + "jsonpaths" = "[\"$.id\", \"$.age\"]", + "use_path_style"="true"); ++------+------+ +| id | age | ++------+------+ +| 1 | 18 | +| 2 | 17 | +| 3 | 19 | ++------+------+ +``` + +**parquet format** + +`parquet` format: S3 tvf supports parsing the column names and column types of the table schema from the parquet file. Example: + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +```sql +MySQL [(none)]> desc function s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.parquet", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "parquet", + "use_path_style"="true"); ++---------------+--------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-------+---------+-------+ +| p_partkey | INT | Yes | false | NULL | NONE | +| p_name | TEXT | Yes | false | NULL | NONE | +| p_mfgr | TEXT | Yes | false | NULL | NONE | +| p_brand | TEXT | Yes | false | NULL | NONE | +| p_type | TEXT | Yes | false | NULL | NONE | +| p_size | INT | Yes | false | NULL | NONE | +| p_container | TEXT | Yes | false | NULL | NONE | +| p_retailprice | DECIMAL(9,0) | Yes | false | NULL | NONE | +| p_comment | TEXT | Yes | false | NULL | NONE | ++---------------+--------------+------+-------+---------+-------+ +``` + +**orc format** + +`orc` format: Same as `parquet` format, set `format` parameter to orc. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/test.snappy.orc", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "orc", + "use_path_style"="true") limit 5; ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| p_partkey | p_name | p_mfgr | p_brand | p_type | p_size | p_container | p_retailprice | p_comment | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +| 1 | goldenrod lavender spring chocolate lace | Manufacturer#1 | Brand#13 | PROMO BURNISHED COPPER | 7 | JUMBO PKG | 901 | ly. slyly ironi | +| 2 | blush thistle blue yellow saddle | Manufacturer#1 | Brand#13 | LARGE BRUSHED BRASS | 1 | LG CASE | 902 | lar accounts amo | +| 3 | spring green yellow purple cornsilk | Manufacturer#4 | Brand#42 | STANDARD POLISHED BRASS | 21 | WRAP CASE | 903 | egular deposits hag | +| 4 | cornflower chocolate smoke green pink | Manufacturer#3 | Brand#34 | SMALL PLATED BRASS | 14 | MED DRUM | 904 | p furiously r | +| 5 | forest brown coral puff cream | Manufacturer#3 | Brand#32 | STANDARD POLISHED TIN | 15 | SM PKG | 905 | wake carefully | ++-----------+------------------------------------------+----------------+----------+-------------------------+--------+-------------+---------------+---------------------+ +``` + +**avro format** + +`avro` format: S3 tvf supports parsing the column names and column types of the table schema from the avro file. Example: + +```sql +select * from s3( + "uri" = "http://127.0.0.1:9312/test2/person.avro", + "ACCESS_KEY" = "ak", + "SECRET_KEY" = "sk", + "FORMAT" = "avro"); ++--------+--------------+-------------+-----------------+ +| name | boolean_type | double_type | long_type | ++--------+--------------+-------------+-----------------+ +| Alyssa | 1 | 10.0012 | 100000000221133 | +| Ben | 0 | 5555.999 | 4009990000 | +| lisi | 0 | 5992225.999 | 9099933330 | ++--------+--------------+-------------+-----------------+ +``` + +**uri contains wildcards** + +uri can use wildcards to read multiple files. Note: If wildcards are used, the format of each file must be consistent (especially csv/csv_with_names/csv_with_names_and_types count as different formats), S3 tvf uses the first file to parse out the table schema. For example: + +The following two csv files: + +``` +// file1.csv +1,aaa,18 +2,qqq,20 +3,qwe,19 + +// file2.csv +5,cyx,19 +6,ftw,21 +``` + +You can use wildcards on the uri to query. + +```sql +MySQL [(none)]> select * from s3( + "URI" = "http://127.0.0.1:9312/test2/file*.csv", + "s3.access_key"= "minioadmin", + "s3.secret_key" = "minioadmin", + "format" = "csv", + "use_path_style"="true"); ++------+------+------+ +| c1 | c2 | c3 | ++------+------+------+ +| 1 | aaa | 18 | +| 2 | qqq | 20 | +| 3 | qwe | 19 | +| 5 | cyx | 19 | +| 6 | ftw | 21 | ++------+------+------+ +``` + +**Using `S3` tvf with `insert into` and `cast`** + +```sql +// Create doris internal table +CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + name varchar(50), + age int + ) + COMMENT "my first table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); + +// Insert data using S3 +insert into ${testTable} (id,name,age) +select cast (id as INT) as id, name, cast (age as INT) as age +from s3( + "uri" = "${uri}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "true", + "use_path_style" = "true"); +``` diff --git a/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md new file mode 100644 index 0000000000000..ebd279effe65d --- /dev/null +++ b/versioned_docs/version-3.0/sql-manual/sql-functions/table-valued-functions/tasks.md @@ -0,0 +1,174 @@ +--- +{ + "title": "TASKS", + "language": "en" +} +--- + + + +## `tasks` + +### Name + +:::tip +tasks +- since 2.1 +::: + +### description + +Table function, generates a temporary table of tasks, which allows you to view the information of tasks generated by jobs in the current Doris cluster. + +This function is used in the FROM clause. + +This functions is supported since 2.1.0. + +#### syntax + +`tasks("type"="insert");` +**parameter description** + +| parameter | description | type | required | +|:----------|:------------|:-------|:---------| +| type | job type | string | yes | + +the **type** supported types +- insert: insert into type job +- mv: materilized view type job + +##### Insert tasks + +The table schema of `tasks("type"="insert");` tvf: + +``` +mysql> desc function tasks("type"="insert");; ++---------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| Label | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| TrackingUrl | TEXT | No | false | NULL | NONE | +| LoadStatistic | TEXT | No | false | NULL | NONE | +| User | TEXT | No | false | NULL | NONE | ++---------------+------+------+-------+---------+-------+ +11 row in set (0.01 sec) +``` +- TaskId: task id +- JobId: job id +- JobName: job name +- Label: label +- Status: task status +- ErrorMsg: task failure information +- CreateTime: task creation time +- FinishTime: task completion time +- TrackingUrl: tracking URL +- LoadStatistic: load statistics +- User: user +##### MV Tasks +```sql +mysql> desc function tasks("type"="mv"); ++-----------------------+------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------------------+------+------+-------+---------+-------+ +| TaskId | TEXT | No | false | NULL | NONE | +| JobId | TEXT | No | false | NULL | NONE | +| JobName | TEXT | No | false | NULL | NONE | +| MvId | TEXT | No | false | NULL | NONE | +| MvName | TEXT | No | false | NULL | NONE | +| MvDatabaseId | TEXT | No | false | NULL | NONE | +| MvDatabaseName | TEXT | No | false | NULL | NONE | +| Status | TEXT | No | false | NULL | NONE | +| ErrorMsg | TEXT | No | false | NULL | NONE | +| CreateTime | TEXT | No | false | NULL | NONE | +| StartTime | TEXT | No | false | NULL | NONE | +| FinishTime | TEXT | No | false | NULL | NONE | +| DurationMs | TEXT | No | false | NULL | NONE | +| TaskContext | TEXT | No | false | NULL | NONE | +| RefreshMode | TEXT | No | false | NULL | NONE | +| NeedRefreshPartitions | TEXT | No | false | NULL | NONE | +| CompletedPartitions | TEXT | No | false | NULL | NONE | +| Progress | TEXT | No | false | NULL | NONE | ++-----------------------+------+------+-------+---------+-------+ +18 rows in set (0.00 sec) +``` + +* TaskId: task id +* JobId: job id +* JobName: job Name +* MvId: Materialized View ID +* MvName: Materialized View Name +* MvDatabaseId: DB ID of the materialized view +* MvDatabaseName: Name of the database to which the materialized view belongs +* Status: task status +* ErrorMsg: Task failure information +* CreateTime: Task creation time +* StartTime: Task start running time +* FinishTime: Task End Run Time +* DurationMs: Task runtime +* TaskContext: Task running parameters +* RefreshMode: refresh mode +* NeedRefreshPartitions: The partition information that needs to be refreshed for this task +* CompletedPartitions: The partition information that has been refreshed for this task +* Progress: Task running progress +### example +#### Insert Tasls +``` +mysql> select * from tasks("type"="insert") limit 1 \G +*************************** 1. row *************************** + TaskId: 667704038678903 + JobId: 10069 + Label: 10069_667704038678903 + Status: FINISHED + EtlInfo: \N + TaskInfo: cluster:N/A; timeout(s):14400; max_filter_ratio:0.0; priority:NORMAL + ErrorMsg: \N + CreateTimeMs: 2023-12-08 16:46:57 + FinishTimeMs: 2023-12-08 16:46:57 + TrackingUrl: +LoadStatistic: {"Unfinished backends":{},"ScannedRows":0,"TaskNumber":0,"LoadBytes":0,"All backends":{},"FileNumber":0,"FileSize":0} + User: root +1 row in set (0.05 sec) + +``` +#### MV Tasks + +1. View tasks for all materialized views + +```sql +mysql> select * from tasks("type"="mv"); +``` + +2. View all tasks with jobName `inner_mtmv_75043` + +```sql +mysql> select * from tasks("type"="mv") where JobName="inner_mtmv_75043"; +``` + + +### keywords + + tasks, job, insert, mv, materilized view diff --git a/versioned_docs/version-3.0/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md b/versioned_docs/version-3.0/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md index d584384369058..441dffc9a3ae1 100644 --- a/versioned_docs/version-3.0/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md +++ b/versioned_docs/version-3.0/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-JOB.md @@ -65,8 +65,8 @@ Currently, only users with the ADMIN role can perform this operation. #### Related Documentation -[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-functions/jobs.md), -[TVF-TASKS](../../../sql-functions/table-functions/tasks.md) +[PAUSE-JOB](../Alter/PAUSE-JOB.md),[RESUME-JOB](../Alter/RESUME-JOB.md),[DROP-JOB](../Drop/DROP-JOB.md), [QUERY-JOB](../../../sql-functions/table-valued-functions/jobs.md), +[TVF-TASKS](../../../sql-functions/table-valued-functions/tasks.md) ### Grammar @@ -167,4 +167,4 @@ CREATE JOB my_job ON SCHEDULE EVERY 1 DAY STARTS '2020-01-01 00:00:00' ENDS '202 ### Keywords - CREATE, JOB, SCHEDULE \ No newline at end of file + CREATE, JOB, SCHEDULE diff --git a/versioned_sidebars/version-2.0-sidebars.json b/versioned_sidebars/version-2.0-sidebars.json index a69405953ccf7..004e8662f4a55 100644 --- a/versioned_sidebars/version-2.0-sidebars.json +++ b/versioned_sidebars/version-2.0-sidebars.json @@ -960,17 +960,22 @@ "sql-manual/sql-functions/table-functions/explode-bitmap", "sql-manual/sql-functions/table-functions/numbers", "sql-manual/sql-functions/table-functions/explode-numbers", - "sql-manual/sql-functions/table-functions/explode-numbers-outer", - "sql-manual/sql-functions/table-functions/s3", - "sql-manual/sql-functions/table-functions/hdfs", - "sql-manual/sql-functions/table-functions/local", - "sql-manual/sql-functions/table-functions/iceberg-meta", - "sql-manual/sql-functions/table-functions/backends", - "sql-manual/sql-functions/table-functions/frontends", - "sql-manual/sql-functions/table-functions/workload-group", - "sql-manual/sql-functions/table-functions/catalogs", - "sql-manual/sql-functions/table-functions/frontends_disks", - "sql-manual/sql-functions/table-functions/queries" + "sql-manual/sql-functions/table-functions/explode-numbers-outer" + ] + }, + { + "type": "category", + "label": "Table Valued Functions", + "items": [ + "sql-manual/sql-functions/table-valued-functions/s3", + "sql-manual/sql-functions/table-valued-functions/hdfs", + "sql-manual/sql-functions/table-valued-functions/local", + "sql-manual/sql-functions/table-valued-functions/iceberg-meta", + "sql-manual/sql-functions/table-valued-functions/backends", + "sql-manual/sql-functions/table-valued-functions/frontends", + "sql-manual/sql-functions/table-valued-functions/frontends_disks", + "sql-manual/sql-functions/table-valued-functions/catalogs", + "sql-manual/sql-functions/table-valued-functions/workload-group" ] }, { @@ -1505,4 +1510,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/versioned_sidebars/version-2.1-sidebars.json b/versioned_sidebars/version-2.1-sidebars.json index 0295f0f6610a4..bb8065298b163 100644 --- a/versioned_sidebars/version-2.1-sidebars.json +++ b/versioned_sidebars/version-2.1-sidebars.json @@ -1025,21 +1025,28 @@ "sql-manual/sql-functions/table-functions/explode-bitmap", "sql-manual/sql-functions/table-functions/numbers", "sql-manual/sql-functions/table-functions/explode-numbers", - "sql-manual/sql-functions/table-functions/explode-numbers-outer", - "sql-manual/sql-functions/table-functions/s3", - "sql-manual/sql-functions/table-functions/hdfs", - "sql-manual/sql-functions/table-functions/local", - "sql-manual/sql-functions/table-functions/iceberg-meta", - "sql-manual/sql-functions/table-functions/backends", - "sql-manual/sql-functions/table-functions/frontends", - "sql-manual/sql-functions/table-functions/workload-group", - "sql-manual/sql-functions/table-functions/catalogs", - "sql-manual/sql-functions/table-functions/frontends_disks", - "sql-manual/sql-functions/table-functions/active_queries", - "sql-manual/sql-functions/table-functions/jobs", - "sql-manual/sql-functions/table-functions/mv_infos", - "sql-manual/sql-functions/table-functions/tasks", - "sql-manual/sql-functions/table-functions/query" + "sql-manual/sql-functions/table-functions/explode-numbers-outer" + ] + }, + { + "type": "category", + "label": "Table Valued Functions", + "items": [ + "sql-manual/sql-functions/table-valued-functions/s3", + "sql-manual/sql-functions/table-valued-functions/hdfs", + "sql-manual/sql-functions/table-valued-functions/local", + "sql-manual/sql-functions/table-valued-functions/query", + "sql-manual/sql-functions/table-valued-functions/iceberg-meta", + "sql-manual/sql-functions/table-valued-functions/backends", + "sql-manual/sql-functions/table-valued-functions/frontends", + "sql-manual/sql-functions/table-valued-functions/frontends_disks", + "sql-manual/sql-functions/table-valued-functions/catalogs", + "sql-manual/sql-functions/table-valued-functions/workload-group", + "sql-manual/sql-functions/table-valued-functions/active_queries", + "sql-manual/sql-functions/table-valued-functions/jobs", + "sql-manual/sql-functions/table-valued-functions/mv_infos", + "sql-manual/sql-functions/table-valued-functions/partitions", + "sql-manual/sql-functions/table-valued-functions/tasks" ] }, { @@ -1616,4 +1623,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/versioned_sidebars/version-3.0-sidebars.json b/versioned_sidebars/version-3.0-sidebars.json index ad6bbfc73e391..186d0e03de1a0 100644 --- a/versioned_sidebars/version-3.0-sidebars.json +++ b/versioned_sidebars/version-3.0-sidebars.json @@ -1049,21 +1049,26 @@ "sql-manual/sql-functions/table-functions/explode-bitmap", "sql-manual/sql-functions/table-functions/numbers", "sql-manual/sql-functions/table-functions/explode-numbers", - "sql-manual/sql-functions/table-functions/explode-numbers-outer", - "sql-manual/sql-functions/table-functions/s3", - "sql-manual/sql-functions/table-functions/hdfs", - "sql-manual/sql-functions/table-functions/local", - "sql-manual/sql-functions/table-functions/iceberg-meta", - "sql-manual/sql-functions/table-functions/backends", - "sql-manual/sql-functions/table-functions/frontends", - "sql-manual/sql-functions/table-functions/workload-group", - "sql-manual/sql-functions/table-functions/catalogs", - "sql-manual/sql-functions/table-functions/frontends_disks", - "sql-manual/sql-functions/table-functions/active_queries", - "sql-manual/sql-functions/table-functions/jobs", - "sql-manual/sql-functions/table-functions/mv_infos", - "sql-manual/sql-functions/table-functions/tasks", - "sql-manual/sql-functions/table-functions/query" + "sql-manual/sql-functions/table-functions/explode-numbers-outer" + ] + }, + { + "type": "category", + "label": "Table Valued Functions", + "items": [ + "sql-manual/sql-functions/table-valued-functions/s3", + "sql-manual/sql-functions/table-valued-functions/hdfs", + "sql-manual/sql-functions/table-valued-functions/local", + "sql-manual/sql-functions/table-valued-functions/query", + "sql-manual/sql-functions/table-valued-functions/iceberg-meta", + "sql-manual/sql-functions/table-valued-functions/backends", + "sql-manual/sql-functions/table-valued-functions/frontends", + "sql-manual/sql-functions/table-valued-functions/frontends_disks", + "sql-manual/sql-functions/table-valued-functions/catalogs", + "sql-manual/sql-functions/table-valued-functions/jobs", + "sql-manual/sql-functions/table-valued-functions/mv_infos", + "sql-manual/sql-functions/table-valued-functions/partitions", + "sql-manual/sql-functions/table-valued-functions/tasks" ] }, { @@ -1646,4 +1651,4 @@ ] } ] -} \ No newline at end of file +}