-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Only supports simple queries with projection, ie. "select a, b from foobar.csv"
- Loading branch information
1 parent
50d6578
commit bb43004
Showing
12 changed files
with
242 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,4 +17,4 @@ _build/ | |
|
||
# oasis generated files | ||
setup.data | ||
setup.log | ||
setup.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
B _build | ||
S *.ml | ||
PKG mparser | ||
PKG ounit | ||
PKG str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
.DEFAULT_GOAL := csv_query.native | ||
|
||
test : QueryParserTest.native | ||
./QueryParserTest.native | ||
|
||
QueryParserTest.native : QueryParserTest.ml QueryParser.ml Query.ml | ||
ocamlbuild -use-ocamlfind 'QueryParserTest.native' | ||
|
||
csv_query.native : csv_query.ml Query.ml QueryParser.ml QueryExecutor.ml | ||
ocamlbuild -use-ocamlfind 'csv_query.native' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
type file_name = string | ||
|
||
type column_name = string | ||
|
||
(*type row = 'a 'b map*) | ||
|
||
type table = | ||
Table of file_name | ||
|
||
type column = | ||
Column of column_name | ||
|
||
type projection = | ||
| All | ||
| Columns of column list | ||
|
||
type query = | ||
Select of (projection * table) | ||
|
||
(* | ||
example query: select e.name, d.name from employees.csv as e join departments.csv as d where d.name = accounting | ||
*) | ||
|
||
let print_table = function | ||
| Table table_name -> "(Table " ^ table_name ^ ")" | ||
|
||
let print_column = function | ||
| Column column_name -> "column_name" | ||
|
||
let print_projection = function | ||
| All -> "All" | ||
| Columns columns -> | ||
let column_names = List.map print_column columns in | ||
"[" ^ String.concat ", " column_names ^ "]" | ||
|
||
let print_query = function | ||
| Select (projection, table) -> | ||
let projection_string = print_projection projection in | ||
let table_string = print_table table in | ||
"Select (" ^ projection_string ^ ", " ^ table_string ^ ")" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
open Query | ||
|
||
type indexed_projection = | ||
| AllIndices | ||
| ColumnIndices of int list | ||
|
||
let deoptionalize l = | ||
let rec deopt acc = function | ||
| [] -> List.rev acc | ||
| None::tl -> deopt acc tl | ||
| Some x::tl -> deopt (x::acc) tl | ||
in | ||
deopt [] l | ||
|
||
let find_index list element = | ||
let rec go i l e = | ||
match l with | ||
| [] -> -1 | ||
| h :: t -> if h = e then i else go (i + 1) t e | ||
in go 0 list element | ||
|
||
let optional_find list i = | ||
try | ||
Some (List.nth list i) | ||
with | ||
| _ -> None | ||
|
||
let get_all_indices indices list = | ||
let found_values = List.map (fun i -> optional_find list i) indices in | ||
deoptionalize found_values | ||
|
||
let project values = function | ||
| AllIndices -> values | ||
| ColumnIndices indices -> | ||
get_all_indices indices values | ||
|
||
let get_indexed_projection header_row = function | ||
| All -> AllIndices | ||
| Columns columns -> | ||
ColumnIndices (List.map (fun (Column projection_col_name) -> find_index header_row projection_col_name) columns) | ||
|
||
let execute_query = function | ||
| Select (projection, Table table_file) -> | ||
let in_channel = open_in table_file in | ||
let header_line = input_line in_channel in | ||
let header_row = Str.split (Str.regexp ",") header_line in | ||
let indexed_projection = get_indexed_projection header_row projection in | ||
print_endline (String.concat "," (project header_row indexed_projection)); | ||
try | ||
while true do | ||
let line = input_line in_channel in | ||
let values = Str.split (Str.regexp ",") line in | ||
print_endline (String.concat "," (project values indexed_projection)) | ||
done | ||
with End_of_file -> | ||
close_in in_channel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
open MParser | ||
open Query | ||
|
||
let whitespace = blank <|> newline | ||
|
||
let identifier_char = alphanum | ||
|
||
let tablename = many1_chars (identifier_char <|> char '.') | ||
|
||
let column_spec = (many1_chars identifier_char) <|> (string "*") | ||
|
||
let csv p = | ||
p >>= fun parsed_head -> | ||
many (skip_char ',' >> whitespace >> p) >>= fun parsed_rest -> | ||
return (parsed_head :: parsed_rest) | ||
|
||
let strings_to_projection names = | ||
if names = ["*"] | ||
then All | ||
else Columns (List.map (fun n -> Column n) names) | ||
|
||
let query_parser = | ||
skip_string "select" >> | ||
whitespace >> | ||
csv column_spec >>= fun column_names -> | ||
whitespace >> | ||
skip_string "from" >> | ||
whitespace >> | ||
tablename >>= fun tn -> | ||
let projection = strings_to_projection column_names in | ||
return (Query.Select (projection, Query.Table tn)) | ||
|
||
let parse_query string = | ||
parse_string query_parser string () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
open MParser | ||
open Query | ||
|
||
val query_parser : (query, unit) MParser.t | ||
|
||
val parse_query : string -> query MParser.result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
open MParser | ||
open OUnit2 | ||
open Query | ||
open QueryParser | ||
|
||
exception Parse_failure of string | ||
|
||
let unwrap_result = function | ||
| Success a -> a | ||
| Failed (message, _) -> assert_failure message | ||
|
||
let parse_test query_string expected_query test_ctxt = | ||
let actual_query = unwrap_result (parse_string query_parser query_string ()) in | ||
let error_msg = "Expected query: " ^ (print_query expected_query) ^ " was not equal to actual query: " ^ (print_query actual_query) in | ||
assert_equal ~msg:error_msg actual_query expected_query | ||
|
||
let parse_simple_all_projection_query = | ||
parse_test "select * from helloworld.csv" (Select (All, Table "helloworld.csv")) | ||
|
||
let parse_simple_projection_query = | ||
parse_test "select hello, world from helloworld.csv" (Select (Columns [Column "hello"; Column "world"], Table "helloworld.csv")) | ||
|
||
let suite = "select query suite" >::: | ||
[ | ||
"parsing a simple * query" >:: parse_simple_all_projection_query; | ||
"parsing a simple query with projection" >:: parse_simple_projection_query | ||
] | ||
|
||
let () = | ||
run_test_tt_main suite |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,45 @@ | ||
# csv-query | ||
A command line tool for performing SQL queries on CSV files | ||
# csv_query - A command line tool for performing SQL queries on CSV files | ||
|
||
Simple usage: | ||
|
||
``` | ||
make | ||
./csv_query.native 'select * from employees.csv' | ||
``` | ||
|
||
## How to build | ||
|
||
Run `make` to run the default target which builds the csv_query.native binary. | ||
|
||
### | ||
|
||
1. mparser | ||
2. ounit (for testing) | ||
3. str | ||
|
||
I still need to figure out how to extract the dependency specificaiton to a file and what command to use to automatically install them. I installed them manually using opam, eg. `opam install mparser` on my own machine. | ||
|
||
## How to test | ||
|
||
Run `make test` to compile and run the test suite. | ||
|
||
## How does it work? | ||
|
||
The query string is parsed using a simple monadic parser constructed using [MParser](https://github.com/cakeplus/mparser). The code for the parser is located in QueryParser.ml. The parser returns a `query` data structure which can be found in Query.ml. The `query` can be executed by the `execute_query` function in QueryExecutor.ml to produce output based on the given SQL query. | ||
|
||
## Supported SQL features | ||
|
||
- SELECT * | ||
- SELECT a, b | ||
- csv files as table names, eg. FROM employees.csv | ||
|
||
## Future work | ||
|
||
- Add more SQL features (WHERE, JOIN, aggregate functions) | ||
- Add method for fetching dependencies easier | ||
- Add property based testing which would automatically generate a Query data value, print it as a string and then parse it back and check that the parsed query matches the original randomly generated one | ||
- Improve build process (I'm terrible at writing Makefiles) | ||
|
||
## Dev environment | ||
|
||
I use VSCode with the OCaml plugin and Merlin. The code is built using `ocamlbuild` and `ocamlfind` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
true: package(ounit), package(mparser), package(str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
open MParser | ||
open QueryParser | ||
open QueryExecutor | ||
|
||
let () = | ||
let query_string = Sys.argv.(1) in | ||
match parse_query query_string with | ||
| Success parsed_query -> | ||
execute_query parsed_query | ||
| Failed (message, _) -> | ||
print_string message |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
name,salary,office_id | ||
alice,2000000,1 | ||
bob,1000000,1 |