Skip to content

Commit

Permalink
chore(query): remove codes of storage/parquet2 (databendlabs#14305)
Browse files Browse the repository at this point in the history
* chore(query): remove codes of storage/parquet2

* chore(query): remove codes of storage/parquet2

* chore(query): remove codes of storage/parquet2
  • Loading branch information
sundy-li authored Feb 29, 2024
1 parent 7a1183a commit c54890d
Show file tree
Hide file tree
Showing 39 changed files with 19 additions and 3,141 deletions.
9 changes: 0 additions & 9 deletions .github/actions/test_sqllogic_stage/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,9 @@ runs:
aws --endpoint-url http://127.0.0.1:9900/ s3 mb s3://testbucket
aws --endpoint-url http://127.0.0.1:9900/ s3 cp tests/data s3://testbucket/data --recursive --no-progress
- name: Run sqllogic Tests with Standalone mode parquet2 lib
shell: bash
env:
TEST_HANDLERS: ${{ inputs.handlers }}
TEST_STAGE_STORAGE: ${{ inputs.storage }}
TEST_STAGE_PARQUET_LIB: parquet2
run: bash ./scripts/ci/ci-run-sqllogic-tests-without-sandbox.sh ${{ inputs.dirs }}

- name: Run sqllogic Tests with Standalone mode parquet_rs lib
shell: bash
env:
TEST_HANDLERS: ${{ inputs.handlers }}
TEST_STAGE_STORAGE: ${{ inputs.storage }}
TEST_STAGE_PARQUET_LIB: parquet_rs
run: bash ./scripts/ci/ci-run-sqllogic-tests-without-sandbox.sh ${{ inputs.dirs }}
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use std::sync::Arc;
use databend_common_expression::TableSchema;
use databend_common_meta_app::schema::TableInfo;

use crate::plan::Parquet2TableInfo;
use crate::plan::ParquetTableInfo;
use crate::plan::ResultScanTableInfo;
use crate::plan::StageTableInfo;
Expand All @@ -30,7 +29,6 @@ pub enum DataSourceInfo {
StageSource(StageTableInfo),
// stage source with parquet format used for select.
ParquetSource(ParquetTableInfo),
Parquet2Source(Parquet2TableInfo),
// Table Function Result_Scan
ResultScanSource(ResultScanTableInfo),
}
Expand All @@ -41,7 +39,6 @@ impl DataSourceInfo {
DataSourceInfo::TableSource(table_info) => table_info.schema(),
DataSourceInfo::StageSource(table_info) => table_info.schema(),
DataSourceInfo::ParquetSource(table_info) => table_info.schema(),
DataSourceInfo::Parquet2Source(table_info) => table_info.schema(),
DataSourceInfo::ResultScanSource(table_info) => table_info.schema(),
}
}
Expand All @@ -51,7 +48,6 @@ impl DataSourceInfo {
DataSourceInfo::TableSource(table_info) => table_info.desc.clone(),
DataSourceInfo::StageSource(table_info) => table_info.desc(),
DataSourceInfo::ParquetSource(table_info) => table_info.desc(),
DataSourceInfo::Parquet2Source(table_info) => table_info.desc(),
DataSourceInfo::ResultScanSource(table_info) => table_info.desc(),
}
}
Expand Down
2 changes: 0 additions & 2 deletions src/query/catalog/src/plan/datasource/datasource_info/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

mod data_source_info;
mod parquet;
mod parquet2;
mod parquet_read_options;
mod result_scan;
mod stage;
Expand All @@ -23,7 +22,6 @@ pub use data_source_info::DataSourceInfo;
pub use parquet::FullParquetMeta;
pub use parquet::ParquetTableInfo;
pub use parquet::ParquetTableInfo as ParquetTableInfoV2;
pub use parquet2::Parquet2TableInfo;
pub use parquet_read_options::ParquetReadOptions;
pub use result_scan::ResultScanTableInfo;
pub use stage::StageTableInfo;
52 changes: 0 additions & 52 deletions src/query/catalog/src/plan/datasource/datasource_info/parquet2.rs

This file was deleted.

29 changes: 0 additions & 29 deletions src/query/catalog/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,35 +526,6 @@ pub struct NavigationDescriptor {

use std::collections::HashMap;

#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)]
pub struct Parquet2TableColumnStatisticsProvider {
column_stats: HashMap<ColumnId, Option<BasicColumnStatistics>>,
num_rows: u64,
}

impl Parquet2TableColumnStatisticsProvider {
pub fn new(column_stats: HashMap<ColumnId, BasicColumnStatistics>, num_rows: u64) -> Self {
let column_stats = column_stats
.into_iter()
.map(|(column_id, stat)| (column_id, stat.get_useful_stat(num_rows)))
.collect();
Self {
column_stats,
num_rows,
}
}
}

impl ColumnStatisticsProvider for Parquet2TableColumnStatisticsProvider {
fn column_statistics(&self, column_id: ColumnId) -> Option<&BasicColumnStatistics> {
self.column_stats.get(&column_id).and_then(|s| s.as_ref())
}

fn num_rows(&self) -> Option<u64> {
Some(self.num_rows)
}
}

#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)]
pub struct ParquetTableColumnStatisticsProvider {
column_stats: HashMap<ColumnId, Option<BasicColumnStatistics>>,
Expand Down
3 changes: 0 additions & 3 deletions src/query/service/src/interpreters/access/privilege_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,6 @@ impl AccessChecker for PrivilegeAccess {
DataSourceInfo::StageSource(stage_info) => {
self.validate_stage_access(&stage_info.stage_info, UserPrivilegeType::Read).await?;
}
DataSourceInfo::Parquet2Source(stage_info) => {
self.validate_stage_access(&stage_info.stage_info, UserPrivilegeType::Read).await?;
}
DataSourceInfo::ParquetSource(stage_info) => {
self.validate_stage_access(&stage_info.stage_info, UserPrivilegeType::Read).await?;
}
Expand Down
2 changes: 0 additions & 2 deletions src/query/service/src/sessions/query_ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ use databend_common_storage::StorageMetrics;
use databend_common_storages_delta::DeltaTable;
use databend_common_storages_fuse::TableContext;
use databend_common_storages_iceberg::IcebergTable;
use databend_common_storages_parquet::Parquet2Table;
use databend_common_storages_parquet::ParquetRSTable;
use databend_common_storages_result_cache::ResultScan;
use databend_common_storages_stage::StageTable;
Expand Down Expand Up @@ -332,7 +331,6 @@ impl TableContext for QueryContext {
stage_info,
plan.tbl_args.clone(),
),
DataSourceInfo::Parquet2Source(table_info) => Parquet2Table::from_info(table_info),
DataSourceInfo::ParquetSource(table_info) => ParquetRSTable::from_info(table_info),
DataSourceInfo::ResultScanSource(table_info) => ResultScan::from_info(table_info),
}
Expand Down
4 changes: 2 additions & 2 deletions src/query/service/tests/it/parquet_rs/prune_pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ use parquet::arrow::arrow_reader::RowSelector;

use crate::parquet_rs::data::make_test_file_page;
use crate::parquet_rs::data::Scenario;
use crate::parquet_rs::utils::create_parquet2_test_fixture;
use crate::parquet_rs::utils::create_parquet_test_fixture;
use crate::parquet_rs::utils::get_data_source_plan;

async fn test(scenario: Scenario, predicate: &str, expected_selection: RowSelection) {
let (file, arrow_schema) = make_test_file_page(scenario).await;
let file_path = file.path().to_string_lossy();
let sql = format!("select * from 'fs://{file_path}' where {predicate}");

let fixture = create_parquet2_test_fixture().await;
let fixture = create_parquet_test_fixture().await;
let plan = get_data_source_plan(fixture.new_query_ctx().await.unwrap(), &sql)
.await
.unwrap();
Expand Down
4 changes: 2 additions & 2 deletions src/query/service/tests/it/parquet_rs/prune_row_groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use databend_common_storages_parquet::ParquetRSPruner;
use super::data::make_test_file_rg;
use super::data::Scenario;
use super::utils::get_data_source_plan;
use crate::parquet_rs::utils::create_parquet2_test_fixture;
use crate::parquet_rs::utils::create_parquet_test_fixture;

/// Enable row groups pruning and test.
async fn test(scenario: Scenario, predicate: &str, expected_rgs: Vec<usize>) {
Expand All @@ -40,7 +40,7 @@ async fn test_impl(scenario: Scenario, predicate: &str, expected_rgs: Vec<usize>
let file_path = file.path().to_string_lossy();
let sql = format!("select * from 'fs://{file_path}' where {predicate}");

let fixture = create_parquet2_test_fixture().await;
let fixture = create_parquet_test_fixture().await;
let plan = get_data_source_plan(fixture.new_query_ctx().await.unwrap(), &sql)
.await
.unwrap();
Expand Down
11 changes: 2 additions & 9 deletions src/query/service/tests/it/parquet_rs/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,10 @@ use databend_common_sql::Planner;
use databend_query::test_kits::ConfigBuilder;
use databend_query::test_kits::TestFixture;

pub async fn create_parquet2_test_fixture() -> TestFixture {
pub async fn create_parquet_test_fixture() -> TestFixture {
let mut conf = ConfigBuilder::create().config();
conf.storage.allow_insecure = true;
let test_fixture = TestFixture::setup_with_config(&conf).await.unwrap();
test_fixture
.default_session()
.get_settings()
.set_use_parquet2(false)
.unwrap();

test_fixture
TestFixture::setup_with_config(&conf).await.unwrap()
}

pub async fn get_data_source_plan(ctx: Arc<dyn TableContext>, sql: &str) -> Result<DataSourcePlan> {
Expand Down
2 changes: 1 addition & 1 deletion src/query/settings/src/settings_default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ impl DefaultSettings {
}),
("use_parquet2", DefaultSettingValue {
value: UserSettingValue::UInt64(0),
desc: "Use parquet2 instead of parquet_rs when infer_schema().",
desc: "This setting is deprecated",
mode: SettingMode::Both,
range: Some(SettingRange::Numeric(0..=1)),
}),
Expand Down
29 changes: 8 additions & 21 deletions src/query/sql/src/planner/binder/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ use databend_common_meta_types::MetaId;
use databend_common_storage::DataOperator;
use databend_common_storage::StageFileInfo;
use databend_common_storage::StageFilesInfo;
use databend_common_storages_parquet::Parquet2Table;
use databend_common_storages_parquet::ParquetRSTable;
use databend_common_storages_result_cache::ResultCacheMetaManager;
use databend_common_storages_result_cache::ResultCacheReader;
Expand Down Expand Up @@ -979,7 +978,6 @@ impl Binder {

let table = match stage_info.file_format_params {
FileFormatParams::Parquet(..) => {
let use_parquet2 = table_ctx.get_settings().get_use_parquet2()?;
let mut read_options = ParquetReadOptions::default();

if !table_ctx.get_settings().get_enable_parquet_page_index()? {
Expand All @@ -997,25 +995,14 @@ impl Binder {
read_options = read_options.with_do_prewhere(false);
}

if use_parquet2 {
Parquet2Table::create(
table_ctx.clone(),
stage_info.clone(),
files_info,
read_options,
files_to_copy,
)
.await?
} else {
ParquetRSTable::create(
table_ctx.clone(),
stage_info.clone(),
files_info,
read_options,
files_to_copy,
)
.await?
}
ParquetRSTable::create(
table_ctx.clone(),
stage_info.clone(),
files_info,
read_options,
files_to_copy,
)
.await?
}
FileFormatParams::NdJson(..) => {
let schema = Arc::new(TableSchema::new(vec![TableField::new(
Expand Down
1 change: 0 additions & 1 deletion src/query/storages/parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ databend-common-functions = { path = "../../functions" }
databend-common-meta-app = { path = "../../../meta/app" }
databend-common-metrics = { path = "../../../common/metrics" }
databend-common-pipeline-core = { path = "../../pipeline/core" }
databend-common-pipeline-sources = { path = "../../pipeline/sources" }
databend-common-storage = { path = "../../../common/storage" }
databend-storages-common-pruner = { path = "../common/pruner" }
databend-storages-common-table-meta = { path = "../common/table_meta" }
Expand Down
2 changes: 0 additions & 2 deletions src/query/storages/parquet/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@
#![feature(int_roundings)]
#![feature(box_patterns)]

mod parquet2;
mod parquet_part;
mod parquet_rs;
mod read_settings;
mod utils;

pub use parquet2::Parquet2Table;
pub use parquet_part::ParquetFilesPart;
pub use parquet_part::ParquetPart;
pub use parquet_rs::InMemoryRowGroup;
Expand Down
24 changes: 0 additions & 24 deletions src/query/storages/parquet/src/parquet2/mod.rs

This file was deleted.

Loading

0 comments on commit c54890d

Please sign in to comment.