-
Notifications
You must be signed in to change notification settings - Fork 1
/
Trace_to_csv.py
29 lines (21 loc) · 1.32 KB
/
Trace_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas
# store filename and path in variable "filename"
filename = "MY-OPENXC-DRIVE-TRACE-FILENAME.json"
# read into dataframe
testJsonData = sqlContext.read.format('json').load(filename)
# register dataframe as a table
sqlContext.registerDataFrameAsTable(testJsonData, "table1")
### this example extracts engine speed and transmission gear position...use the general approach to extract any desired variable
engine_speed = sqlContext.sql("SELECT timestamp, DOUBLE(value) as engine_speed FROM table1 WHERE name = 'engine_speed'")
transmission_gear_position = sqlContext.sql("SELECT timestamp as timestamp2, value as transmission_gear_position FROM table1 WHERE name = 'transmission_gear_position'")
# the two variables are joined by timestamp
Shifting = engine_speed.join(transmission_gear_position, engine_speed.timestamp == transmission_gear_position.timestamp2, "inner")
# use Pandas to write the file to a local csv file
output_file = 'MY-OUTPUT-CSV-FILE.csv'
Shifting.toPandas().to_csv(output_file)
# alternatively, use spark-csv
#
# if taking this approach and using a version of PySpark that differs from 1.6.1,
# refer to http://stackoverflow.com/questions/31385363/how-to-export-a-table-dataframe-in-pyspark-to-csv
# to determine the syntax that will be needed
Shifting.write.format('com.databricks.spark.csv').save(output_file)