Skip to content

Commit

Permalink
use uint as id type
Browse files Browse the repository at this point in the history
  • Loading branch information
doupache committed Sep 28, 2024
1 parent 5c41915 commit e3c325b
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 25 deletions.
7 changes: 3 additions & 4 deletions benchmarks/src/imdb/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ impl ConvertOpt {
pub async fn run(self) -> Result<()> {
let input_path = self.input_path.to_str().unwrap();
let output_path = self.output_path.to_str().unwrap();
let config = SessionConfig::new().with_batch_size(self.batch_size);
let ctx = SessionContext::new_with_config(config);

for table in IMDB_TABLES {
let start = Instant::now();
Expand All @@ -63,10 +65,7 @@ impl ConvertOpt {
.delimiter(b',')
.escape(b'\\')
.file_extension(".csv");

let config = SessionConfig::new().with_batch_size(self.batch_size);
let ctx = SessionContext::new_with_config(config);


let mut csv = ctx.read_csv(&input_path, options).await?;

// Select all apart from the padding column
Expand Down
42 changes: 21 additions & 21 deletions benchmarks/src/imdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ pub const IMDB_TABLES: &[&str] = &[
pub fn get_imdb_table_schema(table: &str) -> Schema {
match table {
"aka_name" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("person_id", DataType::Int32, false),
Field::new("name", DataType::Utf8, true),
Field::new("imdb_index", DataType::Utf8, true),
Expand All @@ -69,7 +69,7 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"aka_title" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("title", DataType::Utf8, true),
Field::new("imdb_index", DataType::Utf8, true),
Expand All @@ -83,7 +83,7 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"cast_info" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("person_id", DataType::Int32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("person_role_id", DataType::Int32, true),
Expand All @@ -92,7 +92,7 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("role_id", DataType::Int32, false),
]),
"char_name" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("name", DataType::Utf8, false),
Field::new("imdb_index", DataType::Utf8, true),
Field::new("imdb_id", DataType::Int32, true),
Expand All @@ -101,11 +101,11 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"comp_cast_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("kind", DataType::Utf8, false),
]),
"company_name" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("name", DataType::Utf8, false),
Field::new("country_code", DataType::Utf8, true),
Field::new("imdb_id", DataType::Int32, true),
Expand All @@ -114,59 +114,59 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"company_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("kind", DataType::Utf8, true),
]),
"complete_cast" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, true),
Field::new("subject_id", DataType::Int32, false),
Field::new("status_id", DataType::Int32, false),
]),
"info_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("info", DataType::Utf8, false),
]),
"keyword" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("keyword", DataType::Utf8, false),
Field::new("phonetic_code", DataType::Utf8, true),
]),
"kind_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("kind", DataType::Utf8, true),
]),
"link_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("link", DataType::Utf8, false),
]),
"movie_companies" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("company_id", DataType::Int32, false),
Field::new("company_type_id", DataType::Int32, false),
Field::new("note", DataType::Utf8, true),
]),
"movie_info_idx" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("info_type_id", DataType::Int32, false),
Field::new("info", DataType::Utf8, false),
Field::new("note", DataType::Utf8, true),
]),
"movie_keyword" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("keyword_id", DataType::Int32, false),
]),
"movie_link" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("linked_movie_id", DataType::Int32, false),
Field::new("link_type_id", DataType::Int32, false),
]),
"name" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("name", DataType::Utf8, false),
Field::new("imdb_index", DataType::Utf8, true),
Field::new("imdb_id", DataType::Int32, true),
Expand All @@ -177,11 +177,11 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"role_type" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("role", DataType::Utf8, false),
]),
"title" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("title", DataType::Utf8, false),
Field::new("imdb_index", DataType::Utf8, true),
Field::new("kind_id", DataType::Int32, false),
Expand All @@ -195,14 +195,14 @@ pub fn get_imdb_table_schema(table: &str) -> Schema {
Field::new("md5sum", DataType::Utf8, true),
]),
"movie_info" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("movie_id", DataType::Int32, false),
Field::new("info_type_id", DataType::Int32, false),
Field::new("info", DataType::Utf8, false),
Field::new("note", DataType::Utf8, true),
]),
"person_info" => Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("id", DataType::UInt32, false),
Field::new("person_id", DataType::Int32, false),
Field::new("info_type_id", DataType::Int32, false),
Field::new("info", DataType::Utf8, false),
Expand Down

0 comments on commit e3c325b

Please sign in to comment.