-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from dingxin-tech/tpch_version
use tpch version as default version
- Loading branch information
Showing
10 changed files
with
350 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
FROM eclipse-temurin:17-jdk-alpine | ||
VOLUME /tmp | ||
COPY target/*.jar app.jar | ||
COPY target/classes/tpch.db /tpch-tiny.db | ||
ENTRYPOINT ["java", "--add-opens=java.base/java.nio=ALL-UNNAMED", "-jar", "/app.jar"] | ||
EXPOSE 8080 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
1. download tpch-tools | ||
from [official website](https://www.tpc.org/TPC_Documents_Current_Versions/download_programs/tools-download-request5.asp?bm_type=TPC-H&bm_vers=3.0.1&mode=CURRENT-ONLY) | ||
2. unzip the file and change dir to dbgen | ||
3. fill the `makefile.suite` and rename it to `Makefile`. the run `make` to build the executable file. (Note that if you | ||
work on | ||
MacOS, you need to change `import malloc.h` to `import stdlib.h` in file `varsub.c` and `bm_utils.c`) | ||
4. run `dbgen` like this `./dbgen -vf -s 1`, and the you will find `.tbl` files in the current dir. | ||
5. Process the obtained .tbl file into csv format for subsequent storage into sqllite. You can use the following script. | ||
|
||
```sql | ||
CREATE TABLE nation ( | ||
nationkey INTEGER PRIMARY KEY, | ||
name TEXT NOT NULL, | ||
regionkey INTEGER NOT NULL, | ||
comment TEXT | ||
); | ||
|
||
CREATE TABLE region ( | ||
regionkey INTEGER PRIMARY KEY, | ||
name TEXT NOT NULL, | ||
comment TEXT | ||
); | ||
|
||
CREATE TABLE part ( | ||
partkey INTEGER PRIMARY KEY, | ||
name TEXT NOT NULL, | ||
mfgr TEXT NOT NULL, | ||
brand TEXT NOT NULL, | ||
type TEXT NOT NULL, | ||
size INTEGER NOT NULL, | ||
container TEXT NOT NULL, | ||
retailprice REAL NOT NULL, | ||
comment TEXT NOT NULL | ||
); | ||
|
||
CREATE TABLE supplier ( | ||
suppkey INTEGER PRIMARY KEY, | ||
name TEXT NOT NULL, | ||
address TEXT NOT NULL, | ||
nationkey INTEGER NOT NULL, | ||
phone TEXT NOT NULL, | ||
acctbal REAL NOT NULL, | ||
comment TEXT NOT NULL | ||
); | ||
|
||
CREATE TABLE partsupp ( | ||
partkey INTEGER NOT NULL, | ||
suppkey INTEGER NOT NULL, | ||
availqty INTEGER NOT NULL, | ||
supplycost REAL NOT NULL, | ||
comment TEXT NOT NULL, | ||
PRIMARY KEY (partkey, suppkey) | ||
); | ||
|
||
CREATE TABLE customer ( | ||
custkey INTEGER PRIMARY KEY, | ||
name TEXT NOT NULL, | ||
address TEXT NOT NULL, | ||
nationkey INTEGER NOT NULL, | ||
phone TEXT NOT NULL, | ||
acctbal REAL NOT NULL, | ||
mktsegment TEXT NOT NULL, | ||
comment TEXT NOT NULL | ||
); | ||
|
||
CREATE TABLE orders ( | ||
orderkey INTEGER PRIMARY KEY, | ||
custkey INTEGER NOT NULL, | ||
orderstatus TEXT NOT NULL, | ||
totalprice REAL NOT NULL, | ||
orderdate TEXT NOT NULL, | ||
orderpriority TEXT NOT NULL, | ||
clerk TEXT NOT NULL, | ||
shippriority INTEGER NOT NULL, | ||
comment TEXT NOT NULL | ||
); | ||
|
||
CREATE TABLE lineitem ( | ||
orderkey INTEGER NOT NULL, | ||
partkey INTEGER NOT NULL, | ||
suppkey INTEGER NOT NULL, | ||
linenumber INTEGER NOT NULL, | ||
quantity REAL NOT NULL, | ||
extendedprice REAL NOT NULL, | ||
discount REAL NOT NULL, | ||
tax REAL NOT NULL, | ||
returnflag TEXT NOT NULL, | ||
linestatus TEXT NOT NULL, | ||
shipdate TEXT NOT NULL, | ||
commitdate TEXT NOT NULL, | ||
receiptdate TEXT NOT NULL, | ||
shipinstruct TEXT NOT NULL, | ||
shipmode TEXT NOT NULL, | ||
comment TEXT NOT NULL, | ||
PRIMARY KEY (orderkey, linenumber) | ||
); | ||
``` | ||
|
||
|
||
```bash | ||
#!/bin/bash | ||
folder_path="." | ||
|
||
# hard code the headers | ||
get_header() { | ||
case "$1" in | ||
"customer") | ||
echo "custkey|name|address|nationkey|phone|acctbal|mktsegment|comment" | ||
;; | ||
"lineitem") | ||
echo "orderkey|partkey|suppkey|linenumber|quantity|extendedprice|discount|tax|returnflag|linestatus|shipdate|commitdate|receiptdate|shipinstruct|shipmode|comment" | ||
;; | ||
"nation") | ||
echo "nationkey|name|regionkey|comment" | ||
;; | ||
"orders") | ||
echo "orderkey|custkey|orderstatus|totalprice|orderdate|orderpriority|clerk|shippriority|comment" | ||
;; | ||
"part") | ||
echo "partkey|name|mfgr|brand|type|size|container|retailprice|comment" | ||
;; | ||
"partsupp") | ||
echo "partkey|suppkey|availqty|supplycost|comment" | ||
;; | ||
"region") | ||
echo "regionkey|name|comment" | ||
;; | ||
"supplier") | ||
echo "suppkey|name|address|nationkey|phone|acctbal|comment" | ||
;; | ||
*) | ||
echo "" | ||
;; | ||
esac | ||
} | ||
|
||
for file in customer.tbl lineitem.tbl nation.tbl orders.tbl part.tbl partsupp.tbl region.tbl supplier.tbl; do | ||
table_name="${file%.*}" | ||
header=$(get_header "$table_name") | ||
|
||
if [ -n "$header" ]; then | ||
tmp_file=$(mktemp "${file}.tmp.XXXXXX") | ||
echo "$header" > "$tmp_file" | ||
sed 's/|$//' "$file" >> "$tmp_file" | ||
mv "$tmp_file" "$file" | ||
else | ||
echo "cannot found $table_name, skip $file" | ||
fi | ||
done | ||
|
||
```g | ||
6. load the data into the table. you can use follow scripts. | ||
```sql | ||
.mode csv | ||
.separator | | ||
.import customer.tbl customer | ||
.import lineitem.tbl lineitem | ||
.import nation.tbl nation | ||
.import orders.tbl orders | ||
.import part.tbl part | ||
.import partsupp.tbl partsupp | ||
.import region.tbl region | ||
.import supplier.tbl supplier | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
125 changes: 125 additions & 0 deletions
125
src/main/java/com/aliyun/odps/utils/AnalyzeDbRunner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.aliyun.odps.utils; | ||
|
||
import com.aliyun.odps.entity.SqlLiteColumn; | ||
import com.aliyun.odps.entity.SqlLiteSchema; | ||
|
||
import java.sql.Connection; | ||
import java.sql.DatabaseMetaData; | ||
import java.sql.DriverManager; | ||
import java.sql.ResultSet; | ||
import java.sql.SQLException; | ||
import java.sql.Statement; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author dingxin ([email protected]) | ||
*/ | ||
public class AnalyzeDbRunner { | ||
|
||
public static void main(String[] args) { | ||
String url = "jdbc:sqlite:tpch.db"; | ||
CommonUtils.initEmulator(); | ||
try (Connection conn = DriverManager.getConnection(url)) { | ||
if (conn != null) { | ||
Map<String, SqlLiteSchema> schemas = getDatabaseSchema(conn); | ||
schemas.forEach((tableName, schema) -> { | ||
try { | ||
deleteRowIfFirstColumnEqualsColumnName(conn, tableName); | ||
SqlRunner.executeSql("INSERT INTO schemas VALUES ('" + tableName + "', '" + schema.toJson() + | ||
"');"); | ||
} catch (SQLException e) { | ||
throw new RuntimeException(e); | ||
} | ||
}); | ||
} | ||
} catch (SQLException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
public static Map<String, SqlLiteSchema> getDatabaseSchema(Connection conn) throws SQLException { | ||
DatabaseMetaData meta = conn.getMetaData(); | ||
ResultSet rsTables = meta.getTables(null, null, "%", new String[] {"TABLE"}); | ||
Map<String, SqlLiteSchema> schemas = new HashMap<>(); | ||
|
||
while (rsTables.next()) { | ||
String tableName = rsTables.getString("TABLE_NAME"); | ||
ResultSet rsColumns = meta.getColumns(null, null, tableName, "%"); | ||
SqlLiteSchema schema = new SqlLiteSchema(); | ||
List<SqlLiteColumn> columns = new ArrayList<>(); | ||
|
||
while (rsColumns.next()) { | ||
String columnName = rsColumns.getString("COLUMN_NAME"); | ||
String columnType = rsColumns.getString("TYPE_NAME"); | ||
if (columnType.equals("TEXT")) { | ||
columnType = "STRING"; | ||
} | ||
boolean notNull = rsColumns.getInt("NULLABLE") == DatabaseMetaData.columnNoNulls; | ||
String defaultValue = rsColumns.getString("COLUMN_DEF"); | ||
boolean primaryKey = isPrimaryKey(meta, tableName, columnName); | ||
|
||
SqlLiteColumn column = | ||
new SqlLiteColumn(columnName, columnType, notNull, defaultValue, primaryKey, false); | ||
columns.add(column); | ||
} | ||
|
||
schema.setColumns(columns); | ||
schema.setPartitionColumns(new ArrayList<>()); | ||
schemas.put(tableName, schema); | ||
rsColumns.close(); | ||
} | ||
|
||
rsTables.close(); | ||
return schemas; | ||
} | ||
|
||
private static boolean isPrimaryKey(DatabaseMetaData meta, String tableName, String columnName) | ||
throws SQLException { | ||
ResultSet rsPrimaryKeys = meta.getPrimaryKeys(null, null, tableName); | ||
while (rsPrimaryKeys.next()) { | ||
String pkColumnName = rsPrimaryKeys.getString("COLUMN_NAME"); | ||
if (columnName.equals(pkColumnName)) { | ||
rsPrimaryKeys.close(); | ||
return true; | ||
} | ||
} | ||
rsPrimaryKeys.close(); | ||
return false; | ||
} | ||
|
||
private static void deleteRowIfFirstColumnEqualsColumnName(Connection conn, String tableName) throws SQLException { | ||
Statement stmt = conn.createStatement(); | ||
ResultSet rs = stmt.executeQuery("PRAGMA table_info(" + tableName + ")"); | ||
|
||
if (rs.next()) { | ||
String firstColumnName = rs.getString("name"); | ||
String sql = "DELETE FROM " + tableName + " WHERE " + firstColumnName + " = '" + firstColumnName + "'"; | ||
stmt.executeUpdate(sql); | ||
System.out.println("Deleted rows from table " + tableName + " where " + firstColumnName + " equals " + | ||
firstColumnName); | ||
} | ||
|
||
stmt.close(); | ||
rs.close(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.