Skip to content

Commit

Permalink
Add initial version of source code
Browse files Browse the repository at this point in the history
- Only supports simple queries with projection, ie. "select a, b from
foobar.csv"
  • Loading branch information
sam-gronblom-rj committed Jun 11, 2017
1 parent 50d6578 commit bb43004
Show file tree
Hide file tree
Showing 12 changed files with 242 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ _build/

# oasis generated files
setup.data
setup.log
setup.log
5 changes: 5 additions & 0 deletions .merlin
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
B _build
S *.ml
PKG mparser
PKG ounit
PKG str
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.DEFAULT_GOAL := csv_query.native

test : QueryParserTest.native
./QueryParserTest.native

QueryParserTest.native : QueryParserTest.ml QueryParser.ml Query.ml
ocamlbuild -use-ocamlfind 'QueryParserTest.native'

csv_query.native : csv_query.ml Query.ml QueryParser.ml QueryExecutor.ml
ocamlbuild -use-ocamlfind 'csv_query.native'
40 changes: 40 additions & 0 deletions Query.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
type file_name = string

type column_name = string

(*type row = 'a 'b map*)

type table =
Table of file_name

type column =
Column of column_name

type projection =
| All
| Columns of column list

type query =
Select of (projection * table)

(*
example query: select e.name, d.name from employees.csv as e join departments.csv as d where d.name = accounting
*)

let print_table = function
| Table table_name -> "(Table " ^ table_name ^ ")"

let print_column = function
| Column column_name -> "column_name"

let print_projection = function
| All -> "All"
| Columns columns ->
let column_names = List.map print_column columns in
"[" ^ String.concat ", " column_names ^ "]"

let print_query = function
| Select (projection, table) ->
let projection_string = print_projection projection in
let table_string = print_table table in
"Select (" ^ projection_string ^ ", " ^ table_string ^ ")"
56 changes: 56 additions & 0 deletions QueryExecutor.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
open Query

type indexed_projection =
| AllIndices
| ColumnIndices of int list

let deoptionalize l =
let rec deopt acc = function
| [] -> List.rev acc
| None::tl -> deopt acc tl
| Some x::tl -> deopt (x::acc) tl
in
deopt [] l

let find_index list element =
let rec go i l e =
match l with
| [] -> -1
| h :: t -> if h = e then i else go (i + 1) t e
in go 0 list element

let optional_find list i =
try
Some (List.nth list i)
with
| _ -> None

let get_all_indices indices list =
let found_values = List.map (fun i -> optional_find list i) indices in
deoptionalize found_values

let project values = function
| AllIndices -> values
| ColumnIndices indices ->
get_all_indices indices values

let get_indexed_projection header_row = function
| All -> AllIndices
| Columns columns ->
ColumnIndices (List.map (fun (Column projection_col_name) -> find_index header_row projection_col_name) columns)

let execute_query = function
| Select (projection, Table table_file) ->
let in_channel = open_in table_file in
let header_line = input_line in_channel in
let header_row = Str.split (Str.regexp ",") header_line in
let indexed_projection = get_indexed_projection header_row projection in
print_endline (String.concat "," (project header_row indexed_projection));
try
while true do
let line = input_line in_channel in
let values = Str.split (Str.regexp ",") line in
print_endline (String.concat "," (project values indexed_projection))
done
with End_of_file ->
close_in in_channel
34 changes: 34 additions & 0 deletions QueryParser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
open MParser
open Query

let whitespace = blank <|> newline

let identifier_char = alphanum

let tablename = many1_chars (identifier_char <|> char '.')

let column_spec = (many1_chars identifier_char) <|> (string "*")

let csv p =
p >>= fun parsed_head ->
many (skip_char ',' >> whitespace >> p) >>= fun parsed_rest ->
return (parsed_head :: parsed_rest)

let strings_to_projection names =
if names = ["*"]
then All
else Columns (List.map (fun n -> Column n) names)

let query_parser =
skip_string "select" >>
whitespace >>
csv column_spec >>= fun column_names ->
whitespace >>
skip_string "from" >>
whitespace >>
tablename >>= fun tn ->
let projection = strings_to_projection column_names in
return (Query.Select (projection, Query.Table tn))

let parse_query string =
parse_string query_parser string ()
6 changes: 6 additions & 0 deletions QueryParser.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
open MParser
open Query

val query_parser : (query, unit) MParser.t

val parse_query : string -> query MParser.result
30 changes: 30 additions & 0 deletions QueryParserTest.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
open MParser
open OUnit2
open Query
open QueryParser

exception Parse_failure of string

let unwrap_result = function
| Success a -> a
| Failed (message, _) -> assert_failure message

let parse_test query_string expected_query test_ctxt =
let actual_query = unwrap_result (parse_string query_parser query_string ()) in
let error_msg = "Expected query: " ^ (print_query expected_query) ^ " was not equal to actual query: " ^ (print_query actual_query) in
assert_equal ~msg:error_msg actual_query expected_query

let parse_simple_all_projection_query =
parse_test "select * from helloworld.csv" (Select (All, Table "helloworld.csv"))

let parse_simple_projection_query =
parse_test "select hello, world from helloworld.csv" (Select (Columns [Column "hello"; Column "world"], Table "helloworld.csv"))

let suite = "select query suite" >:::
[
"parsing a simple * query" >:: parse_simple_all_projection_query;
"parsing a simple query with projection" >:: parse_simple_projection_query
]

let () =
run_test_tt_main suite
47 changes: 45 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,45 @@
# csv-query
A command line tool for performing SQL queries on CSV files
# csv_query - A command line tool for performing SQL queries on CSV files

Simple usage:

```
make
./csv_query.native 'select * from employees.csv'
```

## How to build

Run `make` to run the default target which builds the csv_query.native binary.

###

1. mparser
2. ounit (for testing)
3. str

I still need to figure out how to extract the dependency specificaiton to a file and what command to use to automatically install them. I installed them manually using opam, eg. `opam install mparser` on my own machine.

## How to test

Run `make test` to compile and run the test suite.

## How does it work?

The query string is parsed using a simple monadic parser constructed using [MParser](https://github.com/cakeplus/mparser). The code for the parser is located in QueryParser.ml. The parser returns a `query` data structure which can be found in Query.ml. The `query` can be executed by the `execute_query` function in QueryExecutor.ml to produce output based on the given SQL query.

## Supported SQL features

- SELECT *
- SELECT a, b
- csv files as table names, eg. FROM employees.csv

## Future work

- Add more SQL features (WHERE, JOIN, aggregate functions)
- Add method for fetching dependencies easier
- Add property based testing which would automatically generate a Query data value, print it as a string and then parse it back and check that the parsed query matches the original randomly generated one
- Improve build process (I'm terrible at writing Makefiles)

## Dev environment

I use VSCode with the OCaml plugin and Merlin. The code is built using `ocamlbuild` and `ocamlfind`
1 change: 1 addition & 0 deletions _tags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
true: package(ounit), package(mparser), package(str)
11 changes: 11 additions & 0 deletions csv_query.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
open MParser
open QueryParser
open QueryExecutor

let () =
let query_string = Sys.argv.(1) in
match parse_query query_string with
| Success parsed_query ->
execute_query parsed_query
| Failed (message, _) ->
print_string message
3 changes: 3 additions & 0 deletions samples/employees.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,salary,office_id
alice,2000000,1
bob,1000000,1

0 comments on commit bb43004

Please sign in to comment.