From fe4df9ecd97f44c2365763f9cc13f8c08f934918 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <tgp@preferred.jp>
Date: Mon, 29 Jun 2015 14:10:35 +0900
Subject: [PATCH] release v1.3.0

---
 Changelog.md                                  |   68 +
 README.md                                     |   34 +-
 gateway/build.sbt                             |    2 +-
 .../jubaql_server/gateway/GatewayPlan.scala   |   13 +-
 .../jubaql_server/gateway/JubaQLGateway.scala |   41 +-
 .../src/test/resources/processor-logfile.jar  |  Bin 728 -> 754 bytes
 .../jubaql_server/gateway/GatewayServer.scala |    4 +-
 increase-version.sh                           |   22 +
 processor/NOTICE_SPARK                        |  574 +++
 processor/assembly.sbt                        |   22 +-
 processor/build.sbt                           |   38 +-
 processor/project/assembly.sbt                |    2 +-
 processor/project/build.properties            |    1 +
 processor/project/deptree.sbt                 |    2 +-
 .../src/main/resources/log4j-spark-submit.xml |   70 +
 processor/src/main/resources/log4j.xml        |   14 +-
 .../dstream/OrderedFileInputDStream.scala     |   39 +-
 .../processor/AggregateFunctions.scala        |  281 ++
 .../processor/DatumExtractor.scala            |  282 ++
 .../processor/HandleExceptions.scala          |    2 +
 .../jubaql_server/processor/Helpers.scala     |   47 +
 .../processor/HybridProcessor.scala           |  261 +-
 .../processor/JavaScriptHelpers.scala         |  150 +
 .../processor/JavaScriptUDFManager.scala      |  131 +
 .../jubaql_server/processor/JubaQLAST.scala   |   42 +-
 ...erPrediction.scala => JubaQLContext.scala} |    7 +-
 .../processor/JubaQLParser.scala              |  324 +-
 .../processor/JubaQLPatternLayout.scala       |   46 +
 .../processor/JubaQLProcessor.scala           |   16 +-
 .../processor/JubaQLService.scala             | 1484 +++++--
 .../processor/PreparedJubaQLStatement.scala   |   60 +
 .../processor/RegistrationHandler.scala       |    2 +-
 .../processor/SchemaDStream.scala             |  242 ++
 .../processor/SlidingWindow.scala             |  655 +++
 .../processor/StringWrapper.scala             |    3 +
 ...sifierResult.scala => AnalyzeResult.scala} |   14 +
 ...{AnomalyScore.scala => ErrorMessage.scala} |    2 +-
 ...DatumResult.scala => JubaQLResponse.scala} |   15 +-
 .../processor/json/Register.scala             |    2 +-
 .../processor/json/Unregister.scala           |    2 +-
 .../processor/udf/AggregateFunctions.scala    |  413 ++
 .../udf/OrderedValueRDDFunctions.scala        |   81 +
 .../processor/updater/Anomaly.scala           |   99 +-
 .../processor/updater/Classifier.scala        |  150 +-
 .../processor/updater/HttpClientPerJvm.scala  |   39 +-
 .../processor/updater/JubatusClient.scala     |  154 +
 .../processor/updater/Recommender.scala       |  173 +-
 .../processor/updater/Updater.scala           |   54 -
 processor/src/test/resources/dummydata/1.json |    2 -
 processor/src/test/resources/dummydata/2.json |    2 -
 .../src/test/resources/dummydata/data.json    |   12 +
 processor/src/test/resources/lof.json         |   16 -
 .../test/resources/npb_similar_player.json    |   12 -
 processor/src/test/resources/shogun.json      |   14 -
 .../src/test/resources/shogun_alpha_data.json |   44 +
 processor/src/test/resources/shogun_full.json |   20 +
 .../resources/shogun_splitted_name_data.json  |   44 +
 .../org/apache/spark/LocalSparkContext.scala  |   65 +
 .../org/apache/spark/SharedSparkContext.scala |   42 +
 .../apache/spark/sql/json/JsonRDDCopy.scala   |  398 ++
 .../processor/AggregateFunctionSpec.scala     |  223 +
 .../processor/HasKafkaPath.scala              |    2 +-
 .../processor/HybridProcessorSpec.scala       |  447 +-
 .../processor/JavaScriptSpec.scala            |  264 ++
 .../processor/JubaQLExtractorSpec.scala       |  504 +++
 .../processor/JubaQLParserSpec.scala          |  344 +-
 .../processor/JubaQLProcessorSpec.scala       |  564 ---
 .../processor/JubaQLServiceHelperSpec.scala   |   29 +-
 .../LocalJubatusApplicationSpec.scala         |   23 +
 .../jubaql_server/processor/ProcessUtil.scala |   12 +
 .../processor/RegistrationSpec.scala          |   14 +-
 .../processor/SchemaDStreamSpec.scala         |  177 +
 .../processor/SlidingStreamSpec.scala         |  278 ++
 .../jubaql_server/processor/TestTags.scala    |    3 +
 .../integration/JubaQLProcessorSpec.scala     | 3839 +++++++++++++++++
 .../udf/AggregateFunctionsTest.scala          |  388 ++
 .../udf/ReferenceImplementation.scala         |  157 +
 .../udf/ReferenceImplementationTest.scala     |  143 +
 78 files changed, 13029 insertions(+), 1232 deletions(-)
 create mode 100644 Changelog.md
 create mode 100755 increase-version.sh
 create mode 100644 processor/NOTICE_SPARK
 create mode 100644 processor/project/build.properties
 create mode 100644 processor/src/main/resources/log4j-spark-submit.xml
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/AggregateFunctions.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/DatumExtractor.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/Helpers.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptHelpers.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptUDFManager.scala
 rename processor/src/main/scala/us/jubat/jubaql_server/processor/{json/ClassifierPrediction.scala => JubaQLContext.scala} (79%)
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLPatternLayout.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/PreparedJubaQLStatement.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/SchemaDStream.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/SlidingWindow.scala
 rename processor/src/main/scala/us/jubat/jubaql_server/processor/json/{ClassifierResult.scala => AnalyzeResult.scala} (70%)
 rename processor/src/main/scala/us/jubat/jubaql_server/processor/json/{AnomalyScore.scala => ErrorMessage.scala} (95%)
 rename processor/src/main/scala/us/jubat/jubaql_server/processor/json/{DatumResult.scala => JubaQLResponse.scala} (68%)
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctions.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/udf/OrderedValueRDDFunctions.scala
 create mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/updater/JubatusClient.scala
 delete mode 100644 processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Updater.scala
 delete mode 100644 processor/src/test/resources/dummydata/1.json
 delete mode 100644 processor/src/test/resources/dummydata/2.json
 create mode 100644 processor/src/test/resources/dummydata/data.json
 create mode 100644 processor/src/test/resources/shogun_alpha_data.json
 create mode 100644 processor/src/test/resources/shogun_full.json
 create mode 100644 processor/src/test/resources/shogun_splitted_name_data.json
 create mode 100644 processor/src/test/scala/org/apache/spark/LocalSparkContext.scala
 create mode 100644 processor/src/test/scala/org/apache/spark/SharedSparkContext.scala
 create mode 100644 processor/src/test/scala/org/apache/spark/sql/json/JsonRDDCopy.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/AggregateFunctionSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/JavaScriptSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLExtractorSpec.scala
 delete mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLProcessorSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/ProcessUtil.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/SchemaDStreamSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/SlidingStreamSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/integration/JubaQLProcessorSpec.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctionsTest.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementation.scala
 create mode 100644 processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementationTest.scala

diff --git a/Changelog.md b/Changelog.md
new file mode 100644
index 0000000..1785c54
--- /dev/null
+++ b/Changelog.md
@@ -0,0 +1,68 @@
+Changelog
+=========
+
+1.3.0
+-----
+
+### New Features
+
+* Cascaded Processing
+
+    * The concept of a user-defined "stream" was introduced.
+      Similar to `CREATE VIEW` in SQL, `CREATE STREAM name FROM SELECT ...`
+      allows to create a stream holding the results of a SELECT query over
+      some input stream.
+    * In particular, `ANALYZE` results can be added to a stream in a new
+      column and used further down in the processing pipeline.
+    * A user can define custom functions in JavaScript and use them in
+      queries using the `CREATE FUNCTION` statement.
+    * Multiple data sources can be defined and used one after another
+      for updating/analyzing a model.
+
+* Trigger-Based Action
+
+    * A user can also define functions without a return value using
+      `CREATE TRIGGER FUNCTION` and attach them as trigger on a stream
+      using `CREATE TRIGGER`. This can be used to act based on the contents
+      of a stream, in particular analysis results.
+
+* Time-Series Analysis using Sliding Windows
+
+    * To analyze time-series data, sliding windows over an input stream
+      (based on either item count or an embedded timestamp) can be computed
+      and data in each window aggregated using a set of provided functions
+      such as standard deviation or histogram.
+    * The results of this aggregation can be used like any other data
+      stream.
+
+* Other
+
+    * It is now possible to do feature extraction using user-defined
+      functions.
+
+### Breaking Changes
+
+* `CREATE DATASOURCE`
+
+    * A schema should now in general be provided, as in
+      many cases schema inference will lead to errors whenever an empty
+      data batch is encountered.
+
+* `CREATE MODEL`
+
+    * The syntax for specifying the label/id column has changed from
+      `model_name WITH (label: "class", datum: "name")` to
+      `model_name (label: class) AS ...`
+    * Feature converters are not specified in the JSON configuration any
+      more, but instead with a `column WITH converter` syntax.
+
+* `UPDATE MODEL`
+
+    * The statement will only establish the connection between stream and
+      model, processing will not start yet. This will be done by
+      `START PROCESSING`.
+
+1.2.0
+-----
+
+This is the first public release. See the documentation for features and usage information.
diff --git a/README.md b/README.md
index 44f1780..022e236 100644
--- a/README.md
+++ b/README.md
@@ -6,12 +6,12 @@ How to get started with JubaQL
 
 ### Development Setup
 
-* Get a Hadoop-enabled version of Spark 1.1.1:  
-  `wget http://d3kbcqa49mib13.cloudfront.net/spark-1.1.1-bin-hadoop2.4.tgz`  
+* Get a Hadoop-enabled version of Spark 1.2.2:  
+  `wget http://d3kbcqa49mib13.cloudfront.net/spark-1.2.2-bin-hadoop2.4.tgz`  
   and unpack it somewhere:  
-  `tar -xzf spark-1.1.1-bin-hadoop2.4.tgz && export SPARK_DIST="$(pwd)/spark-1.1.1-bin-hadoop2.4/"`
+  `tar -xzf spark-1.2.2-bin-hadoop2.4.tgz && export SPARK_DIST="$(pwd)/spark-1.2.2-bin-hadoop2.4/"`
 * Install Jubatus.
-* Get JubaQLClient and JubaQLServer (consists of JubaQLProcessor and JubaQLGateway):  
+* Get JubaQL-Client and JubaQL-Server:  
   `git clone https://github.com/jubatus/jubaql-client.git`  
   `git clone https://github.com/jubatus/jubaql-server.git`
 * Build the JubaQL components:
@@ -22,16 +22,17 @@ How to get started with JubaQL
     * JubaQLGateway:  
       `cd jubaql-server/gateway && sbt assembly && cd ../..`
 * Start the JubaQLGateway:  
-  `cd jubaql-server && java -Dspark.distribution="$SPARK_DIST" -Djubaql.processor.fatjar=processor/target/scala-2.10/jubaql-processor-assembly-1.2.0.jar -jar gateway/target/scala-2.10/jubaql-gateway-assembly-1.2.0.jar -i 127.0.0.1`
+  `cd jubaql-server && java -Dspark.distribution="$SPARK_DIST" -Djubaql.processor.fatjar=processor/target/scala-2.10/jubaql-processor-assembly-1.3.0.jar -jar gateway/target/scala-2.10/jubaql-gateway-assembly-1.3.0.jar -i 127.0.0.1`
 * In a different shell, start the JubaQLClient:  
   `./jubaql-client/target/start`
-* You will see the prompt `jubaql>` in the shell and you will in fact be able to type your commands there, but until the JubaQLProcessor is up and running correctly, you will get the message "Unexpected response status: 503".
+* You will see the prompt `jubaql>` in the shell and you will in fact be able to type your commands there, but until the JubaQLProcessor is up and running correctly, you will see the message: "This session has not been registered. Wait a second."
 
 In order to test that your setup is working correctly, you can do a simple classification using the data from the [shogun example](https://github.com/jubatus/jubatus-example/tree/master/shogun). Run the following JubaQL commands in the client:
 
-* `CREATE CLASSIFIER MODEL test WITH (label: "label", datum: "name") config = '{"method": "AROW","converter": {  "num_filter_types": {},  "num_filter_rules": [],  "string_filter_types": {},   "string_filter_rules": [],    "num_types": {},  "num_rules": [],"string_types": {"unigram": { "method": "ngram", "char_num": "1" }},"string_rules": [{ "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" } ]},"parameter": {"regularization_weight" : 1.0}}'`
+* `CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'`
 * `CREATE DATASOURCE shogun (label string, name string) FROM (STORAGE: "file://data/shogun_data.json")`
 * `UPDATE MODEL test USING train FROM shogun`
+* `START PROCESSING shogun`
 * `ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify`
 * `SHUTDOWN`
 
@@ -42,7 +43,7 @@ The JSON returned by the `ANALYZE` statement should indicate that the label "徳
 * Set up a Hadoop cluster with YARN and HDFS in place.
 * Install Jubatus on all cluster nodes.
 * Get JubaQL and compile it as described above. (This time, Jubatus is not required locally.)
-* Install the [Jubatus on YARN](https://github.com/jubatus/jubatus-on-yarn) libraries in HDFS as described in [the instructions](https://github.com/jubatus/jubatus-on-yarn/blob/master/document/%E3%83%93%E3%83%AB%E3%83%89%E3%83%BB%E5%88%A9%E7%94%A8%E6%89%8B%E9%A0%86%E6%9B%B8.md#%E5%AE%9F%E8%A1%8C%E3%81%AB%E5%BF%85%E8%A6%81%E3%81%AA%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E3%81%AE%E6%BA%96%E5%82%99). Make sure that the HDFS directory `/jubatus-on-yarn/application-master/jubaconfig/` exists and is writeable by the user running the JubaQLProcessor application.
+* Install the [Jubatus on YARN](https://github.com/jubatus/jubatus-on-yarn) libraries in HDFS as described in [the instructions](https://github.com/jubatus/jubatus-on-yarn/blob/master/document/instruction.md#required-files). Make sure that the HDFS directory `/jubatus-on-yarn/application-master/jubaconfig/` exists and is writeable by the user running the JubaQLProcessor application.
 * To test the setup, also copy the file `shogun-data.json` from the JubaQL source tree's `data/` directory to `/jubatus-on-yarn/sample/shogun_data.json` in HDFS.
 * Copy the files `core-site.xml`, `yarn-site.xml`, `hdfs-site.xml` containing your Hadoop setup description from one of your cluster nodes to some directory and point the environment variable `HADOOP_CONF_DIR` to that directory.
 * Get your local computer's IP address that points towards the cluster. On Linux, given the IP address of one of your cluster nodes, this should be possible with something like:  
@@ -50,26 +51,29 @@ The JSON returned by the `ANALYZE` statement should indicate that the label "徳
   Make sure that this IP address can be connected to from the cluster nodes and no firewall rules etc. are blocking access.
 * Get the addresses of your Zookeeper nodes and concatenate their `host:port` locations with a comma:  
   `export MY_ZOOKEEPER=zk1:2181,zk2:2181`
+* Locate a temporary directory in HDFS that Spark can use for checkpointing:  
+  `export CHECKPOINT=hdfs:///tmp/spark`
 * Start the JubaQLGateway:  
-  `cd jubaql-server`
-  `java -Drun.mode=production -Djubaql.zookeeper=$MY_ZOOKEEPER -Dspark.distribution="$SPARK_DIST" -Djubaql.processor.fatjar=processor/target/scala-2.10/jubaql-processor-assembly-1.2.0.jar -jar gateway/target/scala-2.10/jubaql-gateway-assembly-1.2.0.jar -i $MY_IP`
+  `cd jubaql-server`  
+  `java -Drun.mode=production -Djubaql.checkpointdir=$CHECKPOINT -Djubaql.zookeeper=$MY_ZOOKEEPER -Dspark.distribution="$SPARK_DIST" -Djubaql.processor.fatjar=processor/target/scala-2.10/jubaql-processor-assembly-1.3.0.jar -jar gateway/target/scala-2.10/jubaql-gateway-assembly-1.3.0.jar -i $MY_IP`
 * In a different shell, start the JubaQLClient:  
   `./jubaql-client/target/start`
-* You will see the prompt `jubaql>` in the shell and you will in fact be able to type your commands there, but until the JubaQLProcessor is up and running correctly, you will get the message "Unexpected response status: 503".
+* You will see the prompt `jubaql>` in the shell and you will in fact be able to type your commands there, but until the JubaQLProcessor is up and running correctly, you will see the message: "This session has not been registered. Wait a second."
 
 In order to test that your setup is working correctly, you can do a simple classification using the `shogun-data.json` file you copied to HDFS before. Run the following JubaQL commands in the client:
 
-* `CREATE CLASSIFIER MODEL test WITH (label: "label", datum: "name") config = '{"method": "AROW","converter": {  "num_filter_types": {},  "num_filter_rules": [],  "string_filter_types": {},   "string_filter_rules": [],    "num_types": {},  "num_rules": [],"string_types": {"unigram": { "method": "ngram", "char_num": "1" }},"string_rules": [{ "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" } ]},"parameter": {"regularization_weight" : 1.0}}'`
+* `CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'`
 * `CREATE DATASOURCE shogun (label string, name string) FROM (STORAGE: "hdfs:///jubatus-on-yarn/sample/shogun_data.json")`
 * `UPDATE MODEL test USING train FROM shogun`
+* `START PROCESSING shogun`
 * `ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify`
 * `SHUTDOWN`
 
-The JSON returned by the `ANALYZE` statement should indicate that the label "徳川" has the highest score.
+The JSON returned by the `ANALYZE` statement should indicate that the label "徳川" has the highest score. Note that the score may differ than in development since multiple Jubatus instances are used for training.
 
 Note:
-* When the JubaQLProcessor is started using `spark-submit` as outlined above, it will first upload the `spark-assembly-1.1.1-hadoop2.4.0.jar` and `jubaql-processor-assembly-1.2.0.jar` to the cluster and add them to HDFS, from where they will be downloaded by each executor.
-* It is possible to skip the upload of the Spark libraries by copying the Spark jar file to HDFS manually and adding the parameter `-Dspark.yarn.jar=hdfs:///path/to/spark-assembly-1.1.1-hadoop2.4.0.jar` when starting the JubaQLGateway.
+
+* When the JubaQLProcessor is started, first the files `spark-assembly-1.2.2-hadoop2.4.0.jar` and `jubaql-processor-assembly-1.3.0.jar` will be uploaded to the cluster and added to HDFS, from where they will be downloaded by each executor. It is possible to skip the upload of the Spark libraries by copying the Spark jar file to HDFS manually and adding the parameter `-Dspark.yarn.jar=hdfs:///path/to/spark-assembly-1.2.2-hadoop2.4.0.jar` when starting the JubaQLGateway.
 * In theory, it is also possible to do the same for the JubaQLProcessor application jar file. However, at the moment we rely on extracting a `log4j.xml` file from that jar locally before upload, so there is no support for also storing that file in HDFS, yet.
 
 ### Run on YARN with remote gateway
diff --git a/gateway/build.sbt b/gateway/build.sbt
index bb46465..7a816fd 100644
--- a/gateway/build.sbt
+++ b/gateway/build.sbt
@@ -1,6 +1,6 @@
 name := "JubaQL Gateway"
 
-version := "1.2.0"
+version := "1.3.0"
 
 // use an older version than necessary to use the same set of dependencies
 // across projects
diff --git a/gateway/src/main/scala/us/jubat/jubaql_server/gateway/GatewayPlan.scala b/gateway/src/main/scala/us/jubat/jubaql_server/gateway/GatewayPlan.scala
index 3f41df9..7f30aec 100644
--- a/gateway/src/main/scala/us/jubat/jubaql_server/gateway/GatewayPlan.scala
+++ b/gateway/src/main/scala/us/jubat/jubaql_server/gateway/GatewayPlan.scala
@@ -39,7 +39,8 @@ import java.nio.file.{StandardCopyOption, Files}
 @io.netty.channel.ChannelHandler.Sharable
 class GatewayPlan(ipAddress: String, port: Int,
                   envpForProcessor: Array[String], runMode: RunMode,
-                  sparkDistribution: String, fatjar: String)
+                  sparkDistribution: String, fatjar: String,
+                  checkpointDir: String)
   extends cycle.Plan
   /* With cycle.SynchronousExecution, there is a group of N (16?) threads
      (named "nioEventLoopGroup-5-*") that will process N requests in
@@ -77,7 +78,7 @@ class GatewayPlan(ipAddress: String, port: Int,
    */
   val tmpLog4jPath: String = try {
     val jar = new JarFile(new File(fatjar))
-    val log4jFile = jar.getEntry("log4j.xml")
+    val log4jFile = jar.getEntry("log4j-spark-submit.xml")
     val log4jIs = jar.getInputStream(log4jFile)
     val tmpFile = File.createTempFile("log4j", ".xml")
     Files.copy(log4jIs, tmpFile.toPath, StandardCopyOption.REPLACE_EXISTING)
@@ -88,7 +89,7 @@ class GatewayPlan(ipAddress: String, port: Int,
       logger.error("failed to create temporary log4j.xml copy: " + e.getMessage)
       throw e
   }
-  logger.debug("extracted log4j.xml file to %s".format(tmpLog4jPath))
+  logger.debug("extracted log4j-spark-submit.xml file to %s".format(tmpLog4jPath))
 
   val errorMsgContentType = ContentType("text/plain; charset=utf-8")
 
@@ -131,7 +132,8 @@ class GatewayPlan(ipAddress: String, port: Int,
           // double-escaped on their way to the Spark driver and probably never end
           // up there.
           cmd.update(6, "spark.driver.extraJavaOptions=-Drun.mode=production " +
-            s"-Djubaql.zookeeper=$zookeeper") // --conf
+            s"-Djubaql.zookeeper=$zookeeper " +
+            s"-Djubaql.checkpointdir=$checkpointDir") // --conf
           // also specify the location of the Spark jar file, if given
           val sparkJarParams = sparkJar match {
             case Some(url) => "--conf" :: s"spark.yarn.jar=$url" :: Nil
@@ -152,7 +154,7 @@ class GatewayPlan(ipAddress: String, port: Int,
               val isr = new InputStreamReader(is)
               val br = new BufferedReader(isr)
               var line: String = br.readLine()
-              while (line != null && line.trim != "yarnAppState: RUNNING") {
+              while (line != null && !line.trim.contains("state: RUNNING")) {
                 if (line.contains("Exception")) {
                   logger.error(line)
                   throw new RuntimeException("could not start spark-submit")
@@ -167,6 +169,7 @@ class GatewayPlan(ipAddress: String, port: Int,
         case RunMode.Development(numThreads) =>
           cmd.update(4, s"local[$numThreads]") // --master
           cmd.update(6, "run.mode=development") // --conf
+          cmd.insertAll(7, Seq("--conf", s"jubaql.checkpointdir=$checkpointDir"))
           logger.debug("executing: " + cmd.mkString(" "))
 
           Try {
diff --git a/gateway/src/main/scala/us/jubat/jubaql_server/gateway/JubaQLGateway.scala b/gateway/src/main/scala/us/jubat/jubaql_server/gateway/JubaQLGateway.scala
index 62817f9..ff4be71 100644
--- a/gateway/src/main/scala/us/jubat/jubaql_server/gateway/JubaQLGateway.scala
+++ b/gateway/src/main/scala/us/jubat/jubaql_server/gateway/JubaQLGateway.scala
@@ -80,17 +80,13 @@ object JubaQLGateway extends LazyLogging {
     }
     logger.info("Starting in run mode %s".format(runMode))
 
-    val sparkDistribution: String = System.getProperty("spark.distribution")
-    if (sparkDistribution == null || sparkDistribution.trim.isEmpty) {
-      System.err.println("No spark.distribution property")
-      System.exit(1)
-    }
-    val fatjar: String = System.getProperty("jubaql.processor.fatjar")
-    if (fatjar == null || fatjar.trim.isEmpty) {
-      System.err.println("No jubaql.processor.fatjar")
-      System.exit(1)
-    }
-    val plan = new GatewayPlan(ipAddress, port, envp, runMode, sparkDistribution, fatjar)
+    val sparkDistribution: String = getPropertyOrExitIfEmpty("spark.distribution")
+    val fatjar: String = getPropertyOrExitIfEmpty("jubaql.processor.fatjar")
+    val checkpointDir = getCheckpointDir(runMode)
+    val plan = new GatewayPlan(ipAddress, port, envp, runMode,
+                               sparkDistribution = sparkDistribution,
+                               fatjar = fatjar,
+                               checkpointDir = checkpointDir)
     val nettyServer = unfiltered.netty.Server.http(port).plan(plan)
     logger.info("JubaQLGateway starting")
     nettyServer.run()
@@ -114,6 +110,29 @@ object JubaQLGateway extends LazyLogging {
 
     parser.parse(args, CommandlineOptions())
   }
+
+  private def getPropertyOrExitIfEmpty(name: String): String = {
+    val prop = scala.util.Properties.propOrElse(name, "")
+    if (prop.trim.isEmpty) {
+      System.err.println(s"No ${name} property")
+      System.exit(1)
+    }
+    prop
+  }
+
+  private def getCheckpointDir(runMode: RunMode): String = {
+    val dir = scala.util.Properties.propOrElse("jubaql.checkpointdir", "")
+    if (dir.trim.isEmpty) {
+      runMode match {
+        case RunMode.Production(_, _, _, _) =>
+          "hdfs:///tmp/spark"
+        case RunMode.Development(_) =>
+          "file:///tmp/spark"
+      }
+    } else {
+      dir
+    }
+  }
 }
 
 case class CommandlineOptions(ip: String = "", port: Int = JubaQLGateway.defaultPort)
diff --git a/gateway/src/test/resources/processor-logfile.jar b/gateway/src/test/resources/processor-logfile.jar
index 050da0bd9889ddb0f0d712c5ad0e75f4b86066fa..4ec47efbae310ab76e7275f7f16a7ff3232bdc91 100644
GIT binary patch
delta 68
zcmcb?`iXUdl$aQU3`0(Sx=EI9aY15Hwr+80Qf_9+M&%qPMzP5gn8bKd#3wIf(%_0=
LU|?uq0ul@W>6{a>

delta 59
zcmeywdV_U>6elNx3`0(Sx=GeXgB&JtP6i1E5MTu2EsY=+SZ?wGCJiosAg`DSBnAMA
CISX|F

diff --git a/gateway/src/test/scala/us/jubat/jubaql_server/gateway/GatewayServer.scala b/gateway/src/test/scala/us/jubat/jubaql_server/gateway/GatewayServer.scala
index 7743057..d1d531a 100644
--- a/gateway/src/test/scala/us/jubat/jubaql_server/gateway/GatewayServer.scala
+++ b/gateway/src/test/scala/us/jubat/jubaql_server/gateway/GatewayServer.scala
@@ -22,7 +22,9 @@ trait GatewayServer extends BeforeAndAfterAll {
 
   protected val plan = new GatewayPlan("example.com", 1234,
                                        Array(), RunMode.Test,
-                                       "", "src/test/resources/processor-logfile.jar")
+                                       sparkDistribution = "",
+                                       fatjar = "src/test/resources/processor-logfile.jar",
+                                       checkpointDir = "file:///tmp/spark")
   protected val server = unfiltered.netty.Server.http(9877).plan(plan)
 
   override protected def beforeAll() = {
diff --git a/increase-version.sh b/increase-version.sh
new file mode 100755
index 0000000..b96d53a
--- /dev/null
+++ b/increase-version.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "Usage: increase-version.sh toversion"
+    exit 1
+fi
+
+OLDVERSION=$(grep "version := " processor/build.sbt | sed 's/[^"]*"\([^"]*\).*/\1/')
+NEWVERSION=$1
+
+echo "Bumping version from $OLDVERSION to $NEWVERSION ..."
+
+sed -i "s/$OLDVERSION/$NEWVERSION/g" */build.sbt
+
+sed -i "s/$OLDVERSION/$NEWVERSION/g" README.md
+
+echo "Checking for old occurences of $OLDVERSION ..."
+
+grep -F -R "$OLDVERSION" */src
+grep -F --directories=skip "$OLDVERSION" */*
+grep -F --directories=skip "$OLDVERSION" *
+
diff --git a/processor/NOTICE_SPARK b/processor/NOTICE_SPARK
new file mode 100644
index 0000000..452aef2
--- /dev/null
+++ b/processor/NOTICE_SPARK
@@ -0,0 +1,574 @@
+Apache Spark
+Copyright 2014 The Apache Software Foundation.
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+========================================================================
+Common Development and Distribution License 1.0
+========================================================================
+
+The following components are provided under the Common Development and Distribution License 1.0. See project link for details.
+
+     (CDDL 1.0) Glassfish Jasper (org.mortbay.jetty:jsp-2.1:6.1.14 - http://jetty.mortbay.org/project/modules/jsp-2.1)
+     (CDDL 1.0) Servlet Specification 2.5 API (org.mortbay.jetty:servlet-api-2.5:6.1.14 - http://jetty.mortbay.org/project/modules/servlet-api-2.5)
+     (COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined)
+     (Common Development and Distribution License (CDDL) v1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
+
+========================================================================
+Common Development and Distribution License 1.1
+========================================================================
+
+The following components are provided under the Common Development and Distribution License 1.1. See project link for details.
+
+     (CDDL 1.1) (GPL2 w/ CPE) JAXB API bundle for GlassFish V3 (javax.xml.bind:jaxb-api:2.2.2 - https://jaxb.dev.java.net/)
+     (CDDL 1.1) (GPL2 w/ CPE) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:1.8 - https://jersey.dev.java.net/jersey-core/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:1.9 - https://jersey.java.net/jersey-core/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:1.9 - https://jersey.java.net/jersey-contribs/jersey-guice/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:1.8 - https://jersey.dev.java.net/jersey-json/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:1.9 - https://jersey.java.net/jersey-json/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:1.8 - https://jersey.dev.java.net/jersey-server/)
+     (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:1.9 - https://jersey.java.net/jersey-server/)
+
+========================================================================
+Common Public License 1.0
+========================================================================
+
+The following components are provided under the Common Public 1.0 License. See project link for details.
+
+     (Common Public License Version 1.0) JUnit (junit:junit-dep:4.10 - http://junit.org)
+     (Common Public License Version 1.0) JUnit (junit:junit:3.8.1 - http://junit.org)
+     (Common Public License Version 1.0) JUnit (junit:junit:4.8.2 - http://junit.org)
+
+========================================================================
+Eclipse Public License 1.0
+========================================================================
+
+The following components are provided under the Eclipse Public License 1.0. See project link for details.
+
+     (Eclipse Public License - Version 1.0) mqtt-client (org.eclipse.paho:mqtt-client:0.4.0 - http://www.eclipse.org/paho/mqtt-client)
+     (Eclipse Public License v1.0) Eclipse JDT Core (org.eclipse.jdt:core:3.1.1 - http://www.eclipse.org/jdt/)
+
+========================================================================
+Mozilla Public License 1.0
+========================================================================
+
+The following components are provided under the Mozilla Public License 1.0. See project link for details.
+
+     (GPL) (LGPL) (MPL) JTransforms (com.github.rwl:jtransforms:2.4.0 - http://sourceforge.net/projects/jtransforms/)
+     (Mozilla Public License Version 1.1) jamon-runtime (org.jamon:jamon-runtime:2.3.1 - http://www.jamon.org/jamon-runtime/)
+
+
+
+========================================================================
+NOTICE files
+========================================================================
+
+The following NOTICEs are pertain to software distributed with this project.
+
+
+// ------------------------------------------------------------------
+// NOTICE file corresponding to the section 4d of The Apache License,
+// Version 2.0, in this case for
+// ------------------------------------------------------------------
+
+Apache Avro
+Copyright 2009-2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Apache Commons Codec
+Copyright 2002-2009 The Apache Software Foundation
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
+
+--------------------------------------------------------------------------------
+src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java contains
+test data from http://aspell.sourceforge.net/test/batch0.tab.
+
+Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
+and distribution of this entire article is permitted in any medium,
+provided this notice is preserved.
+--------------------------------------------------------------------------------
+
+Apache HttpComponents HttpClient
+Copyright 1999-2011 The Apache Software Foundation
+
+This project contains annotations derived from JCIP-ANNOTATIONS
+Copyright (c) 2005 Brian Goetz and Tim Peierls. See http://www.jcip.net
+
+Apache HttpComponents HttpCore
+Copyright 2005-2011 The Apache Software Foundation
+
+Curator Recipes
+Copyright 2011-2014 The Apache Software Foundation
+
+Curator Framework
+Copyright 2011-2014 The Apache Software Foundation
+
+Curator Client
+Copyright 2011-2014 The Apache Software Foundation
+
+Apache Geronimo
+Copyright 2003-2008 The Apache Software Foundation
+
+Activation 1.1
+Copyright 2003-2007 The Apache Software Foundation
+
+Apache Commons Lang
+Copyright 2001-2014 The Apache Software Foundation
+
+This product includes software from the Spring Framework,
+under the Apache License 2.0 (see: StringUtils.containsWhitespace())
+
+Apache log4j
+Copyright 2007 The Apache Software Foundation
+
+# Compress LZF
+
+This library contains efficient implementation of LZF compression format,
+as well as additional helper classes that build on JDK-provided gzip (deflat)
+codec.
+
+## Licensing
+
+Library is licensed under Apache License 2.0, as per accompanying LICENSE file.
+
+## Credit
+
+Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi).
+It was started at Ning, inc., as an official Open Source process used by
+platform backend, but after initial versions has been developed outside of
+Ning by supporting community.
+
+Other contributors include:
+
+* Jon Hartlaub (first versions of streaming reader/writer; unit tests)
+* Cedrik Lime: parallel LZF implementation
+
+Various community members have contributed bug reports, and suggested minor
+fixes; these can be found from file "VERSION.txt" in SCM.
+
+Objenesis
+Copyright 2006-2009 Joe Walnes, Henri Tremblay, Leonardo Mesquita
+
+Apache Commons Net
+Copyright 2001-2010 The Apache Software Foundation
+
+                            The Netty Project
+                            =================
+
+Please visit the Netty web site for more information:
+
+  * http://netty.io/
+
+Copyright 2011 The Netty Project
+
+The Netty Project licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+
+Also, please refer to each LICENSE.<component>.txt file, which is located in
+the 'license' directory of the distribution file, for the license terms of the
+components that this product depends on.
+
+-------------------------------------------------------------------------------
+This product contains the extensions to Java Collections Framework which has
+been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene:
+
+  * LICENSE:
+    * license/LICENSE.jsr166y.txt (Public Domain)
+  * HOMEPAGE:
+    * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/
+    * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/
+
+This product contains a modified version of Robert Harder's Public Domain
+Base64 Encoder and Decoder, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.base64.txt (Public Domain)
+  * HOMEPAGE:
+    * http://iharder.sourceforge.net/current/java/base64/
+
+This product contains a modified version of 'JZlib', a re-implementation of
+zlib in pure Java, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jzlib.txt (BSD Style License)
+  * HOMEPAGE:
+    * http://www.jcraft.com/jzlib/
+
+This product optionally depends on 'Protocol Buffers', Google's data
+interchange format, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.protobuf.txt (New BSD License)
+  * HOMEPAGE:
+    * http://code.google.com/p/protobuf/
+
+This product optionally depends on 'SLF4J', a simple logging facade for Java,
+which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.slf4j.txt (MIT License)
+  * HOMEPAGE:
+    * http://www.slf4j.org/
+
+This product optionally depends on 'Apache Commons Logging', a logging
+framework, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.commons-logging.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://commons.apache.org/logging/
+
+This product optionally depends on 'Apache Log4J', a logging framework,
+which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.log4j.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://logging.apache.org/log4j/
+
+This product optionally depends on 'JBoss Logging', a logging framework,
+which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jboss-logging.txt (GNU LGPL 2.1)
+  * HOMEPAGE:
+    * http://anonsvn.jboss.org/repos/common/common-logging-spi/
+
+This product optionally depends on 'Apache Felix', an open source OSGi
+framework implementation, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.felix.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://felix.apache.org/
+
+This product optionally depends on 'Webbit', a Java event based
+WebSocket and HTTP server:
+
+  * LICENSE:
+    * license/LICENSE.webbit.txt (BSD License)
+  * HOMEPAGE:
+    * https://github.com/joewalnes/webbit
+
+# Jackson JSON processor
+
+Jackson is a high-performance, Free/Open Source JSON processing library.
+It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
+been in development since 2007.
+It is currently developed by a community of developers, as well as supported
+commercially by FasterXML.com.
+
+Jackson core and extension components may be licensed under different licenses.
+To find the details that apply to this artifact see the accompanying LICENSE file.
+For more information, including possible other licensing options, contact
+FasterXML.com (http://fasterxml.com).
+
+## Credits
+
+A list of contributors may be found from CREDITS file, which is included
+in some artifacts (usually source distributions); but is always available
+from the source code management (SCM) system project uses.
+
+Jackson core and extension components may licensed under different licenses.
+To find the details that apply to this artifact see the accompanying LICENSE file.
+For more information, including possible other licensing options, contact
+FasterXML.com (http://fasterxml.com).
+
+mesos
+Copyright 2014 The Apache Software Foundation
+
+Apache Thrift
+Copyright 2006-2010 The Apache Software Foundation.
+
+   Apache Ant
+   Copyright 1999-2013 The Apache Software Foundation
+
+   The <sync> task is based on code Copyright (c) 2002, Landmark
+   Graphics Corp that has been kindly donated to the Apache Software
+   Foundation.
+
+Apache Commons IO
+Copyright 2002-2012 The Apache Software Foundation
+
+Apache Commons Math
+Copyright 2001-2013 The Apache Software Foundation
+
+===============================================================================
+
+The inverse error function implementation in the Erf class is based on CUDA
+code developed by Mike Giles, Oxford-Man Institute of Quantitative Finance,
+and published in GPU Computing Gems, volume 2, 2010.
+===============================================================================
+
+The BracketFinder (package org.apache.commons.math3.optimization.univariate)
+and PowellOptimizer (package org.apache.commons.math3.optimization.general)
+classes are based on the Python code in module "optimize.py" (version 0.5)
+developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/)
+Copyright © 2003-2009 SciPy Developers.
+===============================================================================
+
+The LinearConstraint, LinearObjectiveFunction, LinearOptimizer,
+RelationShip, SimplexSolver and SimplexTableau classes in package
+org.apache.commons.math3.optimization.linear include software developed by
+Benjamin McCann (http://www.benmccann.com) and distributed with
+the following copyright: Copyright 2009 Google Inc.
+===============================================================================
+
+This product includes software developed by the
+University of Chicago, as Operator of Argonne National
+Laboratory.
+The LevenbergMarquardtOptimizer class in package
+org.apache.commons.math3.optimization.general includes software
+translated from the lmder, lmpar and qrsolv Fortran routines
+from the Minpack package
+Minpack Copyright Notice (1999) University of Chicago.  All rights reserved
+===============================================================================
+
+The GraggBulirschStoerIntegrator class in package
+org.apache.commons.math3.ode.nonstiff includes software translated
+from the odex Fortran routine developed by E. Hairer and G. Wanner.
+Original source copyright:
+Copyright (c) 2004, Ernst Hairer
+===============================================================================
+
+The EigenDecompositionImpl class in package
+org.apache.commons.math3.linear includes software translated
+from some LAPACK Fortran routines.  Original source copyright:
+Copyright (c) 1992-2008 The University of Tennessee.  All rights reserved.
+===============================================================================
+
+The MersenneTwister class in package org.apache.commons.math3.random
+includes software translated from the 2002-01-26 version of
+the Mersenne-Twister generator written in C by Makoto Matsumoto and Takuji
+Nishimura. Original source copyright:
+Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+All rights reserved
+===============================================================================
+
+The LocalizedFormatsTest class in the unit tests is an adapted version of
+the OrekitMessagesTest class from the orekit library distributed under the
+terms of the Apache 2 licence. Original source copyright:
+Copyright 2010 CS Systèmes d'Information
+===============================================================================
+
+The HermiteInterpolator class and its corresponding test have been imported from
+the orekit library distributed under the terms of the Apache 2 licence. Original
+source copyright:
+Copyright 2010-2012 CS Systèmes d'Information
+===============================================================================
+
+The creation of the package "o.a.c.m.analysis.integration.gauss" was inspired
+by an original code donated by Sébastien Brisard.
+===============================================================================
+
+The complete text of licenses and disclaimers associated with the the original
+sources enumerated above at the time of code translation are in the LICENSE.txt
+file.
+
+This product currently only contains code developed by authors
+of specific components, as identified by the source code files;
+if such notes are missing files have been created by
+Tatu Saloranta.
+
+For additional credits (generally to people who reported problems)
+see CREDITS file.
+
+Apache Commons Lang
+Copyright 2001-2011 The Apache Software Foundation
+
+Apache Commons Compress
+Copyright 2002-2012 The Apache Software Foundation
+
+Apache Commons CLI
+Copyright 2001-2009 The Apache Software Foundation
+
+Google Guice - Extensions - Servlet
+Copyright 2006-2011 Google, Inc.
+
+Google Guice - Core Library
+Copyright 2006-2011 Google, Inc.
+
+Apache Jakarta HttpClient
+Copyright 1999-2007 The Apache Software Foundation
+
+Apache Hive
+Copyright 2008-2013 The Apache Software Foundation
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+This product includes software developed by The JDBM Project
+(http://jdbm.sourceforge.net/).
+
+This product includes/uses ANTLR (http://www.antlr.org/),
+Copyright (c) 2003-2011, Terrence Parr.
+
+This product includes/uses StringTemplate (http://www.stringtemplate.org/),
+Copyright (c) 2011, Terrence Parr.
+
+This product includes/uses ASM (http://asm.ow2.org/),
+Copyright (c) 2000-2007 INRIA, France Telecom.
+
+This product includes/uses org.json (http://www.json.org/java/index.html),
+Copyright (c) 2002 JSON.org
+
+This product includes/uses JLine (http://jline.sourceforge.net/),
+Copyright (c) 2002-2006, Marc Prud'hommeaux <mwp1@cornell.edu>.
+
+This product includes/uses SQLLine (http://sqlline.sourceforge.net),
+Copyright (c) 2002, 2003, 2004, 2005 Marc Prud'hommeaux <mwp1@cornell.edu>.
+
+This product includes/uses SLF4J (http://www.slf4j.org/),
+Copyright (c) 2004-2010 QOS.ch
+
+This product includes/uses Bootstrap (http://twitter.github.com/bootstrap/),
+Copyright (c) 2012 Twitter, Inc.
+
+This product includes/uses Glyphicons (http://glyphicons.com/),
+Copyright (c) 2010 - 2012 Jan Kovarík
+
+This product includes DataNucleus (http://www.datanucleus.org/)
+Copyright 2008-2008 DataNucleus
+
+This product includes Guava (http://code.google.com/p/guava-libraries/)
+Copyright (C) 2006 Google Inc.
+
+This product includes JavaEWAH (http://code.google.com/p/javaewah/)
+Copyright (C) 2011 Google Inc.
+
+Apache Commons Pool
+Copyright 1999-2009 The Apache Software Foundation
+
+=========================================================================
+==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+==  Version 2.0, in this case for the DataNucleus distribution.        ==
+=========================================================================
+
+===================================================================
+This product includes software developed by many individuals,
+including the following:
+===================================================================
+Erik Bengtson
+Andy Jefferson
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+Joerg von Frantzius
+Thomas Marti
+Barry Haddow
+Marco Schulze
+Ralph Ullrich
+David Ezzio
+Brendan de Beer
+David Eaves
+Martin Taal
+Tony Lai
+Roland Szabo
+Marcus Mennemeier
+Xuan Baldauf
+Eric Sultan
+
+===================================================================
+This product also includes software developed by the TJDO project
+(http://tjdo.sourceforge.net/).
+===================================================================
+
+===================================================================
+This product includes software developed by many individuals,
+including the following:
+===================================================================
+Andy Jefferson
+Erik Bengtson
+Joerg von Frantzius
+Marco Schulze
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+Barry Haddow
+Ralph Ullrich
+David Ezzio
+Brendan de Beer
+David Eaves
+Martin Taal
+Tony Lai
+Roland Szabo
+Anton Troshin (Timesten)
+
+===================================================================
+This product also includes software developed by the Apache Commons project
+(http://commons.apache.org/).
+===================================================================
+
+Apache Java Data Objects (JDO)
+Copyright 2005-2006 The Apache Software Foundation
+
+=========================================================================
+==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+==  Version 2.0, in this case for the Apache Derby distribution.       ==
+=========================================================================
+
+Apache Derby
+Copyright 2004-2008 The Apache Software Foundation
+
+Portions of Derby were originally developed by
+International Business Machines Corporation and are
+licensed to the Apache Software Foundation under the
+"Software Grant and Corporate Contribution License Agreement",
+informally known as the "Derby CLA".
+The following copyright notice(s) were affixed to portions of the code
+with which this file is now or was at one time distributed
+and are placed here unaltered.
+
+(C) Copyright 1997,2004 International Business Machines Corporation.  All rights reserved.
+
+(C) Copyright IBM Corp. 2003.
+
+The portion of the functionTests under 'nist' was originally
+developed by the National Institute of Standards and Technology (NIST),
+an agency of the United States Department of Commerce, and adapted by
+International Business Machines Corporation in accordance with the NIST
+Software Acknowledgment and Redistribution document at
+http://www.itl.nist.gov/div897/ctg/sql_form.htm
+
+Apache Commons Collections
+Copyright 2001-2008 The Apache Software Foundation
+
+Apache Commons Configuration
+Copyright 2001-2008 The Apache Software Foundation
+
+Apache Jakarta Commons Digester
+Copyright 2001-2006 The Apache Software Foundation
+
+Apache Commons BeanUtils
+Copyright 2000-2008 The Apache Software Foundation
+
+Apache Avro Mapred API
+Copyright 2009-2013 The Apache Software Foundation
+
+Apache Avro IPC
+Copyright 2009-2013 The Apache Software Foundation
diff --git a/processor/assembly.sbt b/processor/assembly.sbt
index 5e3272c..6f8f078 100644
--- a/processor/assembly.sbt
+++ b/processor/assembly.sbt
@@ -1,16 +1,11 @@
-import AssemblyKeys._
-
-assemblySettings
-
 test in assembly := {}
 
 jarName in assembly := "jubaql-processor-assembly-" + version.value + ".jar"
 
-/// We MUST include Scala libraries, otherwise scalalogging won't
-/// be included: <https://github.com/sbt/sbt-assembly/issues/116>
-// assemblyOption in assembly ~= {
-//   _.copy(includeScala = false)
-// }
+// Scala libraries will be provided by the runtime.
+assemblyOption in assembly ~= {
+  _.copy(includeScala = false)
+}
 
 mergeStrategy in assembly <<= (mergeStrategy in assembly) {
   (old) => {
@@ -36,6 +31,9 @@ mergeStrategy in assembly <<= (mergeStrategy in assembly) {
     //   commons-beanutils-core-1.8.0.jar:org/apache/commons/beanutils/BasicDynaBean.class
     //   and others
     case PathList("org", "apache", xs @ _*) => MergeStrategy.last
+    // scala-logging-slf4j_2.10-2.1.2.jar:com/typesafe/scalalogging/slf4j/Logger$.class vs.
+    //   scalalogging-slf4j_2.10-1.1.0.jar:com/typesafe/scalalogging/slf4j/Logger$.class
+    case PathList("com", "typesafe", "scalalogging", xs @ _*) => MergeStrategy.last
     // javax.transaction-1.1.1.v201105210645.jar:plugin.properties vs.
     //   javax.servlet-3.0.0.v201112011016.jar:plugin.properties vs.
     //   javax.mail.glassfish-1.4.1.v201005082020.jar:plugin.properties vs.
@@ -50,12 +48,6 @@ mergeStrategy in assembly <<= (mergeStrategy in assembly) {
   }
 }
 
-// take only the Spark and Hadoop jars out (this is more or less an
-// alternative to marking Spark as "provided")
-excludedJars in assembly <<= (fullClasspath in assembly) map { cp =>
-  cp filter {item => item.data.getPath.contains("/org.apache.hadoop/")}
-}
-
 // add "provided" dependencies back to classpath when using "sbt run".
 // this does not affect the "run" function in IDEA (i.e., it can't be used)
 run in Compile <<= Defaults.runTask(fullClasspath in Compile, mainClass in (Compile, run), runner in (Compile, run))
diff --git a/processor/build.sbt b/processor/build.sbt
index 5bd2741..ce48bcc 100644
--- a/processor/build.sbt
+++ b/processor/build.sbt
@@ -1,9 +1,11 @@
 import com.typesafe.sbt.SbtStartScript
 import java.io.File
 
+import org.apache.ivy.core.module.descriptor.ExcludeRule
+
 name := "JubaQL Processor"
 
-version := "1.2.0"
+version := "1.3.0"
 
 // use 2.10 for now (Spark has no 2.11 support yet)
 scalaVersion := "2.10.4"
@@ -20,16 +22,19 @@ resolvers += "Cloudera Repository" at "https://repository.cloudera.com/artifacto
 // Add msgpack repository (sbt does not use the information provided in the Jubatus POM)
 resolvers += "MessagePack" at "http://msgpack.org/maven2"
 
+// local repository
+resolvers += Resolver.file("LocalRepo", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
+
 libraryDependencies ++= Seq(
   // logging
   "com.typesafe.scala-logging" %% "scala-logging-slf4j"    % "2.1.2",
-  "org.slf4j"                  %  "slf4j-api"              % "1.7.7",
-  "org.slf4j"                  %  "slf4j-log4j12"          % "1.7.7",
+  "org.slf4j"                  %  "slf4j-api"              % "1.6.4",
+  "org.slf4j"                  %  "slf4j-log4j12"          % "1.6.4",
   // Jubatus
-  "us.jubat"                   % "jubatus"                 % "0.6.0"
+  "us.jubat"                   % "jubatus"                 % "0.7.1"
             exclude("org.jboss.netty", "netty"),
   // jubatusonyarn
-  "us.jubat"                   %% "jubatus-on-yarn-client"    % "1.0"
+  "us.jubat"                   %% "jubatus-on-yarn-client"    % "1.1"
             exclude("javax.servlet", "servlet-api")
             exclude("org.jboss.netty", "netty"),
   // HTTP server
@@ -39,25 +44,24 @@ libraryDependencies ++= Seq(
   // parsing of program arguments
   "com.github.scopt"           %% "scopt"                  % "3.2.0",
   // Spark
-  "org.apache.spark"           %% "spark-core"             % "1.1.1" % "provided",
-  // the following will prevent org.spark-project.akka:akka-remote_2.10:2.2.3-shaded-protobuf
-  // from pulling in io.netty:netty:3.6.6.Final, but it will not prevent spark-core
-  // itself to pull in io.netty:netty-all:4.0.23.Final (note that the former
-  // includes the package "org.jboss.netty", while the latter includes "io.netty".)
-  "org.spark-project.akka"     %% "akka-remote"            % "2.2.3-shaded-protobuf"
-            exclude("io.netty", "netty"),
-  "org.apache.spark"           %% "spark-streaming"        % "1.1.1" % "provided",
-  "org.apache.spark"           %% "spark-streaming-kafka"  % "1.1.1"
+  "org.apache.spark"           %% "spark-core"             % "1.2.2" % "provided"
+            excludeAll(ExclusionRule(organization = "org.slf4j")),
+  "org.apache.spark"           %% "spark-streaming"        % "1.2.2" % "provided",
+  "org.apache.spark"           %% "spark-streaming-kafka"  % "1.2.2"
+            exclude("org.apache.spark", "spark-streaming_2.10")
             exclude("commons-beanutils", "commons-beanutils")
             exclude("commons-collections", "commons-collections")
             exclude("com.esotericsoftware.minlog", "minlog"),
-  "org.apache.spark"           %% "spark-sql"              % "1.1.1",
+  "org.apache.spark"           %% "spark-sql"              % "1.2.2"
+            exclude("org.apache.spark", "spark-core_2.10"),
   // registration with the gateway
   "net.databinder.dispatch"    %% "dispatch-core"          % "0.11.2",
-  // HDFS
-  "org.apache.hadoop"          % "hadoop-client"           % "2.5.0-cdh5.2.0" % "provided",
+  // math
+  "org.apache.commons"         %  "commons-math3"          % "3.5",
   // for testing
   "org.scalatest"              %% "scalatest"              % "2.2.1"   % "test",
+  "org.scalacheck"             %% "scalacheck"             % "1.12.1"  % "test",
+  "org.subethamail"            %  "subethasmtp"            % "3.1.7"   % "test",
   "net.databinder"             %% "unfiltered-filter"      % "0.8.2"   % "test",
   "net.databinder"             %% "unfiltered-json4s"      % "0.8.2"   % "test",
   "net.databinder"             %% "unfiltered-netty-server" % "0.8.2"  % "test"
diff --git a/processor/project/assembly.sbt b/processor/project/assembly.sbt
index 54c3252..74adde3 100644
--- a/processor/project/assembly.sbt
+++ b/processor/project/assembly.sbt
@@ -1 +1 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0")
diff --git a/processor/project/build.properties b/processor/project/build.properties
new file mode 100644
index 0000000..a6e117b
--- /dev/null
+++ b/processor/project/build.properties
@@ -0,0 +1 @@
+sbt.version=0.13.8
diff --git a/processor/project/deptree.sbt b/processor/project/deptree.sbt
index 3c9aed7..10dfae8 100644
--- a/processor/project/deptree.sbt
+++ b/processor/project/deptree.sbt
@@ -1 +1 @@
-addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
+addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.5")
diff --git a/processor/src/main/resources/log4j-spark-submit.xml b/processor/src/main/resources/log4j-spark-submit.xml
new file mode 100644
index 0000000..299a35f
--- /dev/null
+++ b/processor/src/main/resources/log4j-spark-submit.xml
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+    <!-- this file is only used when running spark-submit! -->
+    <appender name="console" class="org.apache.log4j.ConsoleAppender">
+        <param name="Target" value="System.out"/>
+        <layout class="org.apache.log4j.PatternLayout">
+            <param name="ConversionPattern" value="%d{HH:mm:ss.SSS} [%t] %-5p %c{2} - %m%n"/>
+        </layout>
+    </appender>
+
+    <logger name="io.netty">
+        <level value="info"/>
+    </logger>
+
+    <logger name="com.ning">
+        <level value="info"/>
+    </logger>
+
+    <logger name="org.apache.hadoop">
+        <level value="info"/>
+    </logger>
+
+    <logger name="org.apache.hadoop.mapreduce.lib.input.FileInputFormat">
+        <!-- in INFO mode, one line per HDFS file is printed ... -->
+        <level value="warn"/>
+    </logger>
+
+    <logger name="org.eclipse.jetty">
+        <level value="warn"/>
+    </logger>
+
+    <logger name="org.apache.zookeeper">
+        <level value="warn"/>
+    </logger>
+
+    <logger name="org.apache.spark">
+        <level value="warn"/>
+    </logger>
+
+    <logger name="kafka">
+        <level value="warn"/>
+    </logger>
+
+    <logger name="org.I0Itec.zkclient">
+        <level value="warn"/>
+    </logger>
+
+    <logger name="org.apache.spark.deploy.yarn">
+        <!-- this is required or we won't get a status report during spark-submit -->
+        <level value="info"/>
+    </logger>
+
+    <logger name="us.jubat.jubaql_server.processor.SchemaDStream$SQLResultDStream">
+        <!-- avoid spammy output -->
+        <level value="warn"/>
+    </logger>
+
+    <logger name="us.jubat.jubaql_server.processor.SchemaDStream$RegisteredTableDStream">
+        <!-- avoid spammy output -->
+        <level value="warn"/>
+    </logger>
+
+    <root>
+        <priority value="debug"/>
+        <appender-ref ref="console"/>
+    </root>
+
+</log4j:configuration>
diff --git a/processor/src/main/resources/log4j.xml b/processor/src/main/resources/log4j.xml
index ee27083..4aaa374 100644
--- a/processor/src/main/resources/log4j.xml
+++ b/processor/src/main/resources/log4j.xml
@@ -4,8 +4,8 @@
 <log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
     <appender name="console" class="org.apache.log4j.ConsoleAppender">
         <param name="Target" value="System.out"/>
-        <layout class="org.apache.log4j.PatternLayout">
-            <param name="ConversionPattern" value="%d{HH:mm:ss.SSS} [%t] %-5p %c{2} - %m%n"/>
+        <layout class="us.jubat.jubaql_server.processor.JubaQLPatternLayout">
+            <param name="ConversionPattern" value="%d{HH:mm:ss.SSS} @ %h [%t] %-5p %c{2} - %m%n"/>
         </layout>
     </appender>
 
@@ -51,6 +51,16 @@
         <level value="info"/>
     </logger>
 
+    <logger name="us.jubat.jubaql_server.processor.SchemaDStream$SQLResultDStream">
+        <!-- avoid spammy output -->
+        <level value="warn"/>
+    </logger>
+
+    <logger name="us.jubat.jubaql_server.processor.SchemaDStream$RegisteredTableDStream">
+        <!-- avoid spammy output -->
+        <level value="warn"/>
+    </logger>
+
     <root>
         <priority value="debug"/>
         <appender-ref ref="console"/>
diff --git a/processor/src/main/scala/org/apache/spark/streaming/dstream/OrderedFileInputDStream.scala b/processor/src/main/scala/org/apache/spark/streaming/dstream/OrderedFileInputDStream.scala
index 9f13814..71231d3 100644
--- a/processor/src/main/scala/org/apache/spark/streaming/dstream/OrderedFileInputDStream.scala
+++ b/processor/src/main/scala/org/apache/spark/streaming/dstream/OrderedFileInputDStream.scala
@@ -13,27 +13,24 @@
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * ---
- *
- * Based on FileInputDStream from the Apache Spark 1.1.0 distribution.
- */
-
+//
+// This file is based on streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+// from Apache Spark 1.1.0 and incorporates code covered by the following terms:
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE_SPARK file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 package org.apache.spark.streaming.dstream
 
 import java.io.{ObjectInputStream, IOException}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/AggregateFunctions.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/AggregateFunctions.scala
new file mode 100644
index 0000000..836093d
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/AggregateFunctions.scala
@@ -0,0 +1,281 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.types._
+
+import scala.reflect.ClassTag
+
+sealed trait SomeAggregateFunction[IN] {
+  def aggFun(rdd: RDD[(Long, (Long, IN))]): RDD[(Long, Any)]
+
+  val inType: DataType
+  val outType: DataType
+
+  implicit protected def toLongAnyRDD[T: ClassTag](rdd: RDD[(Long, T)]) =
+    rdd.mapValues(_.asInstanceOf[Any])
+}
+
+trait DoubleInputAggFun extends SomeAggregateFunction[Double] {
+  override val inType: DataType = DoubleType
+}
+
+object AvgFun extends DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    us.jubat.jubaql_server.processor.udf.AvgFun.apply(rdd)
+
+  override val outType: DataType = DoubleType
+}
+
+object StdDevFun extends DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    us.jubat.jubaql_server.processor.udf.StdDevFun.apply(rdd)
+
+  override val outType: DataType = DoubleType
+}
+
+class QuantileFun(position: Double = 0.5d)
+  extends us.jubat.jubaql_server.processor.udf.QuantileFun(position)
+  with DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    apply(rdd)
+
+  override val outType: DataType = DoubleType
+}
+
+object LinApproxFun extends DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    us.jubat.jubaql_server.processor.udf.LinApproxFun.apply(rdd).mapValues(ab => {
+      Map("a" -> ab._1, "b" -> ab._2).asInstanceOf[Any]
+    })
+
+  override val outType: DataType =
+    StructType(StructField("a", DoubleType, nullable = true) ::
+      StructField("b", DoubleType, nullable = true) :: Nil)
+}
+
+object FourierCoeffsFun extends DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    us.jubat.jubaql_server.processor.udf.FourierCoeffsFun.apply(rdd).mapValues(reIm => {
+      Map("re" -> reIm._1, "im" -> reIm._2).asInstanceOf[Any]
+    })
+
+  override val outType: DataType =
+    StructType(StructField("re", ArrayType(DoubleType, containsNull = false), nullable = true) ::
+      StructField("im", ArrayType(DoubleType, containsNull = false), nullable = true) :: Nil)
+}
+
+object WaveletCoeffsFun extends DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    us.jubat.jubaql_server.processor.udf.WaveletCoeffsFun.apply(rdd)
+
+  override val outType: DataType = ArrayType(DoubleType, containsNull = false)
+}
+
+class HistogramFun(lowestUpperBound: Double = 0.1,
+                   highestLowerBound: Double = 0.9,
+                   numBins: Int = 10)
+  extends us.jubat.jubaql_server.processor.udf.HistogramFun(lowestUpperBound, highestLowerBound, numBins)
+  with DoubleInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, Double))]) =
+    apply(rdd)
+
+  override val outType: DataType = ArrayType(DoubleType, containsNull = false)
+}
+
+trait StringInputAggFun extends SomeAggregateFunction[String] {
+  override val inType: DataType = StringType
+}
+
+class ConcatFun(separator: String = " ")
+  extends us.jubat.jubaql_server.processor.udf.ConcatFun(separator)
+  with StringInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, String))]) =
+    apply(rdd)
+
+  override val outType: DataType = StringType
+}
+
+object MaxElemFun extends StringInputAggFun {
+  override def aggFun(rdd: RDD[(Long, (Long, String))]) =
+  // for some reason in (only) this case, type parameters
+  // to apply() must be specified explicitly
+    us.jubat.jubaql_server.processor.udf.MaxElemFun.apply[Long, Long, String](rdd)
+
+  override val outType: DataType = StringType
+}
+
+object AggregateFunctions {
+  type AggFunOrError = Either[String, SomeAggregateFunction[_]]
+
+  // check parameter types for aggregate functions
+
+  def checkAvgParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("avg", params, AvgFun).check
+  }
+
+  def checkStdDevParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("stddev", params, StdDevFun).check
+  }
+
+  def checkQuantileParams(params: List[Expression]): AggFunOrError = {
+    params match {
+      // version with provided p parameter
+      case pExp :: exp :: Nil =>
+        if (!pExp.foldable) {
+          Left("first parameter to quantile must be evaluable")
+        } else {
+          if (!pExp.dataType.isInstanceOf[NumericType]) {
+            Left(s"wrong type of parameters for quantile (must be (numeric, numeric))")
+          } else {
+            val pBox = pExp.eval()
+            pBox match {
+              case p: Double if p >= 0 && p <= 1.0 =>
+                Right(new QuantileFun(p))
+              case _ =>
+                Left("first parameter to quantile must be in [0,1] range")
+            }
+          }
+        }
+
+      // no parameter version
+      case others =>
+        SingleParamAggFunctionChecker("quantile", others, new QuantileFun()).check
+    }
+  }
+
+  def checkLinApproxParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("linapprox", params, LinApproxFun).check
+  }
+
+  def checkFourierParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("fourier", params, FourierCoeffsFun).check
+  }
+
+  def checkWaveletParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("wavelet", params, WaveletCoeffsFun).check
+  }
+
+  def checkHistogramParams(params: List[Expression]): AggFunOrError = {
+    params match {
+      // version with 3 provided parameters (bounds and number of bins)
+      case lubExp :: hlbExp :: binExp :: exp :: Nil =>
+        if (!lubExp.foldable || !hlbExp.foldable || !binExp.foldable) {
+          Left("parameters for histogram must be evaluable")
+        } else {
+          if (!lubExp.dataType.isInstanceOf[NumericType] ||
+            !hlbExp.dataType.isInstanceOf[NumericType] ||
+            !binExp.dataType.isInstanceOf[IntegralType]) {
+            Left(s"wrong type of parameters for histogram (must be (numeric, numeric, integer, numeric))")
+          } else {
+            (lubExp.eval(), hlbExp.eval(), binExp.eval()) match {
+              case (lub: Double, hlb: Double, bin: Int) =>
+                Right(new HistogramFun(lub, hlb, bin))
+              case _ =>
+                Left("wrong type of parameters for histogram, must be (double, double, int, numeric)")
+            }
+          }
+        }
+
+      // version with 2 provided parameters (bounds)
+      case lubExp :: hlbExp :: exp :: Nil =>
+        if (!lubExp.foldable || !hlbExp.foldable) {
+          Left("parameters for histogram must be evaluable")
+        } else {
+          if (!lubExp.dataType.isInstanceOf[NumericType] ||
+            !hlbExp.dataType.isInstanceOf[NumericType]) {
+            Left(s"wrong type of parameters for histogram (must be (numeric, numeric, numeric))")
+          } else {
+            (lubExp.eval(), hlbExp.eval()) match {
+              case (lub: Double, hlb: Double) =>
+                Right(new HistogramFun(lub, hlb))
+              case _ =>
+                Left("wrong type of parameters for histogram, must be (double, double, numeric)")
+            }
+          }
+        }
+
+      // version with 1 provided parameter (bins)
+      case binExp :: exp :: Nil =>
+        if (!binExp.foldable) {
+          Left("parameters for histogram must be evaluable")
+        } else {
+          if (!binExp.dataType.isInstanceOf[IntegralType]) {
+            Left(s"wrong type of parameters for histogram (must be (integer, numeric))")
+          } else {
+            (binExp.eval()) match {
+              case bin: Int =>
+                Right(new HistogramFun(numBins = bin))
+              case _ =>
+                Left("wrong type of parameters for histogram, must be (int, numeric)")
+            }
+          }
+        }
+
+      // no parameter version
+      case others =>
+        SingleParamAggFunctionChecker("histogram", others, new HistogramFun()).check
+    }
+  }
+
+  def checkConcatParams(params: List[Expression]): AggFunOrError = {
+    params match {
+      // version with provided p parameter
+      case cExp :: exp :: Nil =>
+        if (!cExp.foldable) {
+          Left("first parameter to concat must be evaluable")
+        } else {
+          if (!cExp.dataType.equals(StringType)) {
+            Left(s"wrong type of parameters for concat (must be (string, string))")
+          } else {
+            val cBox = cExp.eval()
+            cBox match {
+              case c: String =>
+                Right(new ConcatFun(c))
+              case _ =>
+                Left("first parameter to concat must be a string")
+            }
+          }
+        }
+
+      // no parameter version
+      case others =>
+        SingleParamAggFunctionChecker("concat", others, new ConcatFun()).check
+    }
+  }
+
+  def checkMaxElemParams(params: List[Expression]): AggFunOrError = {
+    SingleParamAggFunctionChecker("maxelem", params, MaxElemFun).check
+  }
+
+  private case class SingleParamAggFunctionChecker[T <:
+  SomeAggregateFunction[_]](name: String,
+                            params: List[Expression],
+                            obj: T) {
+    def check: Either[String, T] = {
+      params match {
+        case exp :: Nil =>
+          Right(obj)
+        case _ =>
+          Left(s"wrong number of parameters for $name (must be 1)")
+      }
+    }
+  }
+
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/DatumExtractor.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/DatumExtractor.scala
new file mode 100644
index 0000000..d1225ed
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/DatumExtractor.scala
@@ -0,0 +1,282 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import com.typesafe.scalalogging.slf4j.Logger
+import org.apache.spark.sql.{ShortType, IntegerType, LongType, FloatType, DoubleType, StringType, DataType, Row}
+import org.json4s.JsonAST.JNumber
+import org.json4s.native.JsonMethods
+import org.json4s._
+import us.jubat.common.Datum
+
+import scala.collection.mutable
+import scala.collection.concurrent
+import scala.collection.JavaConversions._
+
+object DatumExtractor {
+  def extract(cm: CreateModel,
+              data: String,
+              featureFunctions: concurrent.Map[String, String],
+              logger: Logger): Datum
+    = extract(cm, JsonMethods.parse(data), featureFunctions, logger)
+
+  def extract(cm: CreateModel,
+              data: JValue,
+              featureFunctions: concurrent.Map[String, String],
+              logger: Logger): Datum = {
+    // we can only process numeric and string values
+    val filtered: List[(String, JValue)] = data.filterField {
+      case JField(_, _: JNumber) | JField(_, _: JString) => true
+      case _ => false
+    }
+
+    // fill the row with Spark-compatible values (String, Int, Long, Double)
+    val row = Row(filtered.map {
+      case (_, value: JString) =>
+        value.s
+      case (_, value: JInt) if value.num.isValidInt =>
+          value.num.toInt
+      case (_, value: JInt) =>
+          // note: this may still overflow
+          value.num.toLong
+      case (_, value: JDecimal) =>
+        value.num.toDouble
+      case (_, value: JDouble) =>
+        value.num
+    }: _*)
+    // set the correct type information for Spark
+    val schema: Map[String, (Int, DataType)] = filtered.zipWithIndex.map {
+        case ((key, _: JString), ix) =>
+          (key, (ix, StringType))
+        case ((key, value: JInt), ix) if value.num.isValidInt =>
+            (key, (ix, IntegerType))
+        case ((key, _: JInt), ix) =>
+            (key, (ix, LongType))
+        case ((key, _: JNumber), ix) =>
+          (key, (ix, DoubleType))
+      }.toMap
+
+    extract(cm, schema, row, featureFunctions, logger)
+  }
+
+  def extract(cm: CreateModel,
+              schema: Map[String, (Int, DataType)],
+              row: Row,
+              featureFunctions: concurrent.Map[String, String],
+              logger: Logger): Datum = {
+    // schema is a mapping (column name -> (column index, column type))
+
+    val datum = new Datum()
+    val ds = new DatumSetter(datum, row, logger)
+
+    // schemaCopy holds current candidate columns for feature extraction.
+    // one matches, then deleted from schemaCopy.
+    val schemaCopy = mutable.Map[String, (Int, DataType)](schema.toSeq: _*)
+    // remove a label column or a id column
+    cm.labelOrId.foreach(schemaCopy -= _._2)
+
+    def getFeatureFunctionBodyByName(f: String): String = {
+      featureFunctions.get(f) match {
+        case None =>
+          val knownFuncs = featureFunctions.keys.mkString(", ")
+          val msg = s"feature function '$f' is not found (known: ${knownFuncs})"
+          logger.error(msg)
+          throw new RuntimeException(msg)
+        case Some(funcBody) =>
+          funcBody
+      }
+    }
+
+    // register feature functions
+    cm.featureExtraction.foreach {
+      case (_, "id") | (_, "unigram") | (_, "bigram") =>
+        // do nothing
+
+      case (NormalParameters(params), funcName) =>
+        val funcBody = getFeatureFunctionBodyByName(funcName)
+        JavaScriptFeatureFunctionManager.register(funcName, params.length, funcBody)
+
+      case (_, funcName) =>
+        val funcBody = getFeatureFunctionBodyByName(funcName)
+        JavaScriptFeatureFunctionManager.register(funcName, 1, funcBody)
+    }
+
+    type SchemaType = Seq[(String, (Int, DataType))]
+
+    def processWithoutFeatureFunction(column: (String, (Int, DataType))): Unit = column match {
+      case (colName, (rowIdx, dataType)) =>
+        ds.setFromRow(colName, rowIdx, dataType)
+        schemaCopy -= colName
+    }
+
+    def processWithJubatusFeatureFunction(funcName: String, column: (String, (Int, DataType))): Unit = column match {
+      case (colName, (rowIdx, dataType)) =>
+        ds.setFromRow(s"$colName-$funcName-jubaconv", rowIdx, dataType)
+        schemaCopy -= colName
+    }
+
+    def processWithFeatureFunction(columns: SchemaType, funcName: String): Unit = {
+      val args: Seq[AnyRef] = columns.map {
+        case (colName, (rowIdx, dataType)) =>
+          ds.getFromRow(rowIdx, dataType) match {
+            case None =>
+              // TODO: improve message
+              val msg = s"failed to get $colName"
+              logger.error(msg)
+              throw new RuntimeException(msg)
+
+            case Some(arg) =>
+              arg.asInstanceOf[AnyRef]
+          }
+      }
+
+      val values = JavaScriptFeatureFunctionManager.callAndGetValues(funcName, args: _*)
+
+      val catArgNames = columns.map(_._1).mkString(",")
+      val outputColNameCommon = s"$funcName#$catArgNames"
+
+      values.foreach {
+        case (key, value) =>
+          val outputColName =
+            // if we have a single-valued return function, omit the object key,
+            // otherwise add it to the datum's key string
+            if (values.size == 1) {
+              outputColNameCommon
+            } else {
+              outputColNameCommon + "#" + key
+            }
+          value match {
+            case s: String =>
+              ds.set(outputColName, s)
+            case x: Double =>
+              ds.set(outputColName, x)
+          }
+      }
+      columns.foreach(schemaCopy -= _._1)
+    }
+
+    def processColumns(funcName: String, columns: Seq[(String, (Int, DataType))]) = {
+      funcName match {
+        case "id" =>
+          columns.foreach(processWithoutFeatureFunction)
+        case "unigram" | "bigram" =>
+          columns.foreach(processWithJubatusFeatureFunction(funcName, _))
+        case _ =>
+          columns.foreach {
+            case arg =>
+              processWithFeatureFunction(Seq(arg), funcName)
+          }
+      }
+    }
+
+    cm.featureExtraction.foreach {
+      // *
+      case (WildcardAnyParameter, funcName) =>
+        processColumns(funcName, schemaCopy.toSeq)
+
+      // prefix_*
+      case (WildcardWithPrefixParameter(prefix), funcName) =>
+        val processedColumns = schemaCopy.filter(_._1.startsWith(prefix)).toSeq
+        processColumns(funcName, processedColumns)
+
+      // *_suffix
+      case (WildcardWithSuffixParameter(suffix), funcName) =>
+        val processedColumns = schemaCopy.filter(_._1.endsWith(suffix)).toSeq
+        processColumns(funcName, processedColumns)
+
+      // not wildcard
+      case (NormalParameters(params), funcName) =>
+        params match {
+          case Nil =>
+            val msg = "should not pass here. (this may be a bug of parser)"
+            logger.error(msg)
+            throw new RuntimeException(msg) // maybe RuntimeException is inappropriate...
+
+          case colNames =>
+            val columns: SchemaType = params.map {
+              case colName =>
+                // if we have an explicitly specified column name,
+                // then it does not matter whether it was used before or
+                // not, so we access `schema`, not `schemaCopy`
+                schema.get(colName) match {
+                  case None =>
+                    val msg = s"column named '$colName' not found"
+                    logger.error(msg)
+                    throw new RuntimeException(msg)
+                  case Some((rowIdx, dataType)) =>
+                    (colName, (rowIdx, dataType))
+                }
+            }
+
+            funcName match {
+              case "id" if colNames.length == 1 =>
+                processWithoutFeatureFunction(columns.head)
+              case "unigram" | "bigram" if colNames.length == 1 =>
+                processWithJubatusFeatureFunction(funcName, columns.head)
+              case "id" =>
+                val msg = "attempt to call id feature function with more than one arguments"
+                logger.error(msg)
+                throw new RuntimeException(msg)
+              case "unigram" | "bigram" =>
+                val msg = "attempt to call Jubatus feature function with more than one argument"
+                logger.error(msg)
+                throw new RuntimeException(msg)
+              case _ =>
+                processWithFeatureFunction(columns, funcName)
+            }
+        }
+    }
+
+    datum
+  }
+}
+
+private class DatumSetter(d: Datum, row: Row, logger: Logger) {
+  def getFromRow(rowIdx: Int, dataType: DataType): Option[Any] = dataType match {
+    case _ if row.isNullAt(rowIdx) =>
+      // a null value is a property of one particular data value, so
+      // we will just ignore this value and continue with the next
+      None
+    case StringType =>
+      Some(row.getString(rowIdx))
+    case FloatType =>
+      Some(row.getFloat(rowIdx).toDouble)
+    case DoubleType =>
+      Some(row.getDouble(rowIdx))
+    case ShortType =>
+      Some(row.getShort(rowIdx).toDouble)
+    case IntegerType =>
+      Some(row.getInt(rowIdx).toDouble)
+    case LongType =>
+      Some(row.getLong(rowIdx).toDouble)
+    case other =>
+      logger.warn(s"cannot take value of type '$other' from row $row")
+      None
+  }
+  def setFromRow(colName: String, rowIdx: Int, dataType: DataType) = {
+    getFromRow(rowIdx, dataType) match {
+      case Some(s: String) =>
+        set(colName, s)
+      case Some(x: Double) =>
+        set(colName, x)
+      case _ =>
+        // do nothing
+    }
+  }
+
+  def set(colName: String, value: String) = d.addString(colName, value)
+  def set(colName: String, value: Double) = d.addNumber(colName, value)
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/HandleExceptions.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/HandleExceptions.scala
index 0929820..d6fe798 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/HandleExceptions.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/HandleExceptions.scala
@@ -30,6 +30,8 @@ class HandleExceptions
     // `handle` asynchronously handles exceptions.
     service(request) handle {
       case error =>
+        logger.error(error.toString)
+        logger.error(error.getMessage)
         logger.error(error.getStackTraceString)
         val statusCode = HttpResponseStatus.INTERNAL_SERVER_ERROR
         val body = ("result" -> error.getMessage)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/Helpers.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/Helpers.scala
new file mode 100644
index 0000000..135cddb
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/Helpers.scala
@@ -0,0 +1,47 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.rdd.RDD
+import org.joda.time.format.{DateTimeFormatterBuilder, ISODateTimeFormat}
+
+object Helpers {
+  def niceRDDString(rdd: RDD[_]): String = {
+    rdd.toDebugString.split('\n').map("  " + _).mkString("\n")
+  }
+
+  // a date parser for 2014-11-21T15:52:21[.943321112]
+  protected val timestampParser = {
+    val fractionElem = new DateTimeFormatterBuilder()
+      .appendLiteral('.')
+      .appendFractionOfSecond(3, 9).toFormatter
+    new DateTimeFormatterBuilder()
+      .append(ISODateTimeFormat.date)
+      .appendLiteral('T')
+      .append(ISODateTimeFormat.hourMinuteSecond)
+      .appendOptional(fractionElem.getParser)
+      .toFormatter
+  }
+
+  def parseTimestamp(s: String): Long = timestampParser.parseMillis(s)
+
+  // a date formatter for 2014-11-21T15:52:21.943
+  // note that this will only be used for window timestamps, so millisecond
+  // precision is totally ok
+  protected val timestampFormatter = ISODateTimeFormat.dateHourMinuteSecondMillis()
+
+  def formatTimestamp(l: Long): String = timestampFormatter.print(l)
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/HybridProcessor.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/HybridProcessor.scala
index a0c86db..eb2b97b 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/HybridProcessor.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/HybridProcessor.scala
@@ -15,6 +15,8 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
+import RunMode.Development
+
 import scala.concurrent.future
 import scala.concurrent.ExecutionContext.Implicits.global
 import org.apache.spark.SparkContext
@@ -40,10 +42,21 @@ import org.json4s.native.JsonMethods._
 // "struct" holding the number of processed items, runtime in ms and largest seen id
 case class ProcessingInformation(itemCount: Long, runtime: Long, maxId: Option[String])
 
+// an object describing the state of the processor
+sealed trait ProcessorState
+
+case object Initialized extends ProcessorState
+
+case object Running extends ProcessorState
+
+case object Finished extends ProcessorState
+
 class HybridProcessor(sc: SparkContext,
                       sqlc: SQLContext,
                       storageLocation: String,
-                      streamLocations: List[String])
+                      streamLocations: List[String],
+                      runMode: RunMode = RunMode.Development,
+                      checkpointDir: String = "file:///tmp/spark")
   extends LazyLogging {
   /*
    * We want to do processing of static data first, then continue with
@@ -92,12 +105,18 @@ class HybridProcessor(sc: SparkContext,
   else
     logger.warn("could not extract number of cores from run command: " + _runCmd)
 
-  // define the formats that we can use
+  /// define the STORAGE sources that we can use
+  // a file in the local file system (must be accessible by all executors)
   val fileRe = """file://(.+)""".r
+  // a file in HDFS
   val hdfsRe = """(hdfs://.+)""".r
+  // an empty data set
+  val emptyRe = """^empty(.?)""".r
+  /// define the STREAM sources that we can use
+  // a Kafka message broker (host:port/topic/groupid)
   val kafkaRe = """kafka://([^/]+)/([^/]+)/([^/]+)$""".r
+  // endless dummy JSON data
   val dummyRe = """^dummy(.?)""".r
-  val emptyRe = """^empty(.?)""".r
   val validStaticLocations: List[Regex] = emptyRe :: fileRe :: hdfsRe :: Nil
   val validStreamLocations: List[Regex] = dummyRe :: kafkaRe :: Nil
 
@@ -127,13 +146,29 @@ class HybridProcessor(sc: SparkContext,
   // Flag that stores whether user stopped data processing manually
   protected var userStoppedProcessing = false
 
+  // state of the processor
+  protected var _state: ProcessorState = Initialized
+
+  protected def setState(newState: ProcessorState) = synchronized {
+    _state = newState
+  }
+
+  def state: ProcessorState = synchronized {
+    _state
+  }
+
   /**
-   * Start hybrid processing using the given transformation.
+   * Start hybrid processing using the given RDD[JValue] operation.
    *
-   * @param transform an RDD operation that will be performed on each batch
+   * The stream data will be parsed into a JValue (if possible) and the
+   * transformation is expected to act on the resulting RDD[JValue].
+   * Note that *as opposed to* the `start(SchemaRDD => SchemaRDD)` version,
+   * if the input RDD is empty, the function will still be executed.
+   *
+   * @param process an RDD operation that will be performed on each batch
    * @return one function to stop processing and one to get the highest IDs seen so far
    */
-  def start(transform: RDD[JValue] => RDD[_]): (() => (ProcessingInformation, ProcessingInformation),
+  def startJValueProcessing(process: RDD[JValue] => Unit): (() => (ProcessingInformation, ProcessingInformation),
     () => Option[IdType]) = {
     val parseJsonStringIntoOption: (String => Traversable[JValue]) = line => {
       val maybeJson = parseOpt(line)
@@ -143,54 +178,132 @@ class HybridProcessor(sc: SparkContext,
       }
       maybeJson
     }
-    val parseAndTransform: RDD[String] => RDD[Unit] = rdd => {
-      transform(rdd.flatMap(parseJsonStringIntoOption)).map(_ => ())
-    }
-    _start(parseAndTransform)
+    // parse DStream[String] into DStream[JValue] item by item,
+    // skipping unparseable strings
+    val parseJsonDStream = (stream: DStream[String]) =>
+      stream.flatMap(parseJsonStringIntoOption)
+    val processJsonDStream: DStream[JValue] => Unit =
+      _.foreachRDD(process)
+    // start processing
+    _start(parseJsonDStream, processJsonDStream)
   }
 
   /**
-   * Start hybrid processing using the given transformation.
+   * Start hybrid processing using the given SchemaRDD operation.
    *
-   * @param transform an RDD operation that will be performed on each batch
+   * The stream data will be equipped with a schema (either as passed
+   * as a parameter or as inferred by `SQLContext.jsonRDD()`) and the
+   * operation is expected to act on the resulting SchemaRDD.
+   * Note that if the RDD is empty, the given function will not be
+   * executed at all (not even with an empty RDD as a parameter).
+   *
+   * @param process an RDD operation that will be performed on each batch
    * @return one function to stop processing and one to get the highest IDs seen so far
    */
-  def start(transform: SchemaRDD => SchemaRDD,
+  def startTableProcessing(process: SchemaRDD => Unit,
             schema: Option[StructType]): (() => (ProcessingInformation, ProcessingInformation),
     () => Option[IdType]) = {
-    val parseAndTransform: RDD[String] => RDD[Unit] = rdd => {
-      // with an empty RDD, we cannot infer the schema (it will raise an exception)
-      if (rdd.count() > 0) {
-        // parse with schema or infer if not given
-        val jsonRdd = schema.map(sqlc.jsonRDD(rdd, _)).getOrElse(sqlc.jsonRDD(rdd, 0.1))
-        transform(jsonRdd).map(_ => ())
-      } else {
-        // create an (empty) SchemaRDD
-        rdd.map(_ => ())
-      }
+    // parse DStream[String] into a row/column shaped stream
+    val parseJson: DStream[String] => SchemaDStream = schema match {
+      case Some(givenSchema) =>
+        SchemaDStream.fromStringStreamWithSchema(sqlc, _, givenSchema, None)
+      case None =>
+        SchemaDStream.fromStringStream(sqlc, _, None)
+    }
+    // We must only execute the process function if the RDD is non-empty.
+    // For inferred schema method, if the RDD is empty then the schema
+    // will be empty, too. For given schema method, we have to check
+    // the actual count (which is more expensive).
+    val processIfNotEmpty: SchemaRDD => Unit = schema match {
+      case Some(givenSchema) =>
+        rdd => if (rdd.count() > 0) process(rdd)
+      case None =>
+        rdd => if (rdd.schema.fields.size > 0) process(rdd)
     }
-    _start(parseAndTransform)
+    val processStream: SchemaDStream => Unit =
+      _.foreachRDD(processIfNotEmpty)
+    _start[SchemaDStream](parseJson, processStream)
   }
 
   /**
-   * Start hybrid processing using the given transformation.
+   * Start hybrid processing using the given SchemaRDD operation.
    *
-   * @param parseAndTransform an RDD operation that will be performed on each batch
+   * The stream data will be equipped with a schema (either as passed
+   * as a parameter or as inferred by `SQLContext.jsonRDD()`) and the
+   * operation is expected to act on the resulting SchemaDStream.
+   * The function is responsible for triggering output operations.
+   *
+   * @param process a function to transform and operate on the main DStream
+   * @return one function to stop processing and one to get the highest IDs seen so far
+   */
+  def startTableProcessingGeneral(process: SchemaDStream => Unit,
+                                  schema: Option[StructType],
+                                  inputStreamName: String): (() => (ProcessingInformation,
+    ProcessingInformation), () => Option[IdType]) = {
+    // parse DStream[String] into a row/column shaped stream
+    val parseJson: DStream[String] => SchemaDStream = schema match {
+      case Some(givenSchema) =>
+        SchemaDStream.fromStringStreamWithSchema(sqlc, _, givenSchema, Some(inputStreamName))
+      case None =>
+        SchemaDStream.fromStringStream(sqlc, _, Some(inputStreamName))
+    }
+    _start[SchemaDStream](parseJson, process)
+  }
+
+  /**
+   * Start hybrid processing using the given operation.
+   *
+   * The function passed in must operate on an RDD[String] (the stream data
+   * to be processed in a single batch), where each item of the RDD can be
+   * assumed to be JSON-encoded. The function *itself* is responsible to
+   * start computation (e.g. by using `rdd.foreach()` or `rdd.count()`).
+   * As that function can do arbitrary (nested and chained) processing, the
+   * notion of "number of processed items" makes only limited sense; we
+   * work with the "number of input items" instead.
+   *
+   * @param parseJson a function to get the input stream into something processable,
+   *                  like `DStream[String] => DStream[JValue]` or
+   *                  `DStream[String] => SchemaDStream`. "processable" means
+   *                  that there is a `foreachRDD()` method matching the
+   *                  parameter type of the `process()` function.
+   *                  (This is applied duck typing!)
+   * @param process the actual operations on the parsed data stream. Note that
+   *                this function is responsible for calling an output operation.
+   * @tparam T the type of RDD that the parsed stream will allow processing on,
+   *           like `RDD[JValue]` or `SchemaRDD`
    * @return one function to stop processing and one to get the highest IDs seen so far
    */
-  protected def _start(parseAndTransform: RDD[String] => RDD[Unit]):
+  protected def _start[T](parseJson: DStream[String] => T,
+                          process: T => Unit):
   (() => (ProcessingInformation, ProcessingInformation), () => Option[IdType]) = {
+    if (state != Initialized) {
+      val msg = "processor cannot be started in state " + state
+      logger.error(msg)
+      throw new RuntimeException(msg)
+    }
+    setState(Running)
     logger.debug("creating StreamingContext for static data")
+    /* In order for updateStreamByKey() to work, we need to enable RDD checkpointing
+     * by setting a checkpoint directory. Note that this is different from enabling
+     * Streaming checkpointing (which would be needed for driver fault-tolerance),
+     * which would require the whole state of the application (in particular, all
+     * functions in stream.foreachRDD(...) calls) to be serializable. This would
+     * mean a rewrite of large parts of code, if it is possible at all.
+     * Also see <https://www.mail-archive.com/user%40spark.apache.org/msg22150.html>.
+     */
+    sc.setCheckpointDir(checkpointDir)
     ssc_ = new StreamingContext(sc, Seconds(2))
 
     // this has to match our jubaql_timestamp inserted by fluentd
-    val extractRe = """.+"jubaql_timestamp": ?"([0-9\-:.T]+)".*""".r
+    val timestampInJsonRe = """ *"jubaql_timestamp": ?"([0-9\-:.T]+)" *""".r
 
+    // Extract a jubaql_timestamp field from a JSON-shaped string and return it.
     val extractId: String => IdType = item => {
-      item match {
-        case extractRe(idString) =>
-          idString
-        case _ =>
+      timestampInJsonRe.findFirstMatchIn(item) match {
+        case Some(aMatch) =>
+          val id = aMatch.group(1)
+          id
+        case None =>
           ""
       }
     }
@@ -257,13 +370,15 @@ class HybridProcessor(sc: SparkContext,
       logger.debug("not repartitioning")
       staticData
     }
+    // first find the maximal ID in the data and count it
     repartitionedData.map(item => {
+      val id = extractId(item)
       // update maximal ID
-      maxStaticId += Some(extractId(item))
-      item
-    }).transform(parseAndTransform).foreachRDD(rdd => {
+      maxStaticId += Some(id)
+      id
+    }).foreachRDD(rdd => {
       val count = rdd.count()
-      // we count the number of total processed rows (on the driver)
+      // we count the number of total input rows (on the driver)
       countStatic += count
       // stop processing of static data if there are no new files
       if (count == 0) {
@@ -275,6 +390,9 @@ class HybridProcessor(sc: SparkContext,
         logger.info(s"processed $count (static) lines")
       }
     })
+    // now do the actual processing
+    val mainStream = parseJson(repartitionedData)
+    process(mainStream)
 
     // start first StreamingContext
     logger.info("starting static data processing")
@@ -315,7 +433,7 @@ class HybridProcessor(sc: SparkContext,
       } else {
         logger.warn("static data processing ended, but did not complete")
       }
-      staticStreamingContext.stop(false, true)
+      staticStreamingContext.stop(stopSparkContext = false, stopGracefully = true)
       logger.debug("bye from thread to wait for completion of static processing")
     } onFailure {
       case error: Throwable =>
@@ -326,7 +444,6 @@ class HybridProcessor(sc: SparkContext,
     future {
       // NB. This is a separate thread. In functions that will be serialized,
       // you cannot necessarily use variables from outside this thread.
-      // Also see <http://mail-archives.apache.org/mod_mbox/spark-user/201501.mbox/%3CCANGvG8pf+ukzLi38hjVeV91BVW0zgEFB4KX2niK+BW55M_zNFw@mail.gmail.com%3E>.
       val localExtractId = extractId
       val localCountStream = countStream
       val localMaxStreamId = maxStreamId
@@ -340,6 +457,16 @@ class HybridProcessor(sc: SparkContext,
       staticRunTime = System.currentTimeMillis() - staticStartTime
       logger.debug("static processing ended after %d items and %s ms, largest seen ID: %s".format(
         countStatic.value, staticRunTime, largestStaticItemId))
+      logger.debug("sleeping a bit to allow Spark to settle")
+      runMode match {
+        case Development =>
+          Thread.sleep(200)
+        case _ =>
+          // If we don't sleep long enough here, then old/checkpointed RDDs
+          // won't be cleaned up in time before the next process starts. For
+          // some reason, this happens only with YARN.
+          Thread.sleep(8000)
+      }
       if (staticProcessingComplete && !userStoppedProcessing) {
         logger.info("static processing completed successfully, setting up stream")
         streamLocations match {
@@ -347,11 +474,15 @@ class HybridProcessor(sc: SparkContext,
             // set up stream processing
             logger.debug("creating StreamingContext for stream data")
             ssc_ = new StreamingContext(sc, Seconds(2))
-            val allStreamData: DStream[String] = streamLocation match {
+            val allStreamData: DStream[(IdType, String)] = (streamLocation match {
               case dummyRe(nothing) =>
-                // dummy JSON data emitted over and over
-                val dummyData = sc.parallelize("{\"id\": 5}" :: "{\"id\": 6}" ::
-                  "{\"id\": 7}" :: Nil)
+                // dummy JSON data emitted over and over (NB. the timestamp
+                // is not increasing over time)
+                val dummyData = sc.parallelize(
+                  """{"gender":"m","age":26,"jubaql_timestamp":"2014-11-21T15:52:21.943321112"}""" ::
+                    """{"gender":"f","age":24,"jubaql_timestamp":"2014-11-21T15:52:22"}""" ::
+                    """{"gender":"m","age":31,"jubaql_timestamp":"2014-11-21T15:53:21.12345"}""" ::
+                    Nil)
                 new ConstantInputDStream(ssc_, dummyData)
               case kafkaRe(zookeeper, topics, groupId) =>
                 // connect to the given Kafka instance and receive data
@@ -366,38 +497,41 @@ class HybridProcessor(sc: SparkContext,
                     // left for broadcast variables, so we cannot communicate
                     // our "runState = false" information.
                     StorageLevel.DISK_ONLY).map(_._2)
-            }
+            }).map(item => (localExtractId(item), item))
             val streamData = (largestStaticItemId match {
               case Some(largestId) =>
                 // only process items with a strictly larger id than what we
                 // have seen so far
                 logger.info("filtering for items with an id larger than " + largestId)
-                allStreamData.filter(item => {
-                  localExtractId(item) > largestId
+                allStreamData.filter(itemWithId => {
+                  itemWithId._1 > largestId
                 })
               case None =>
                 // don't do any ID filtering if there is no "largest id"
                 logger.info("did not see any items in static processing, " +
                   "processing whole stream")
                 allStreamData
-            }).map(item => {
+            }).map(itemWithId => {
               // remember the largest seen ID
-              localMaxStreamId += Some(localExtractId(item))
-              item
+              localMaxStreamId += Some(itemWithId._1)
+              itemWithId._2
             })
             logger.debug("stream data DStream: " + streamData)
-            streamData.transform(parseAndTransform).foreachRDD(rdd => {
-              // this `count` is *necessary* to trigger the (lazy) transformation!
+            streamData.foreachRDD(rdd => {
               val count = rdd.count()
               // we count the number of total processed rows (on the driver)
               localCountStream += count
               logger.info(s"processed $count (stream) lines")
             })
+            // now do the actual processing
+            val mainStream = parseJson(streamData)
+            process(mainStream)
             // start stream processing
             synchronized {
               if (userStoppedProcessing) {
                 logger.info("processing was stopped by user during stream setup, " +
                   "not starting")
+                setState(Finished)
               } else {
                 logger.info("starting stream processing")
                 streamStartTime = System.currentTimeMillis()
@@ -407,21 +541,26 @@ class HybridProcessor(sc: SparkContext,
           case Nil =>
             logger.info("not starting stream processing " +
               "(no stream source given)")
+            setState(Finished)
           case _ =>
             logger.error("not starting stream processing " +
               "(multiple streams not implemented)")
+            setState(Finished)
         }
       } else if (staticProcessingComplete && userStoppedProcessing) {
         logger.info("static processing was stopped by user, " +
           "not setting up stream")
+        setState(Finished)
       } else {
         logger.warn("static processing did not complete successfully, " +
           "not setting up stream")
+        setState(Finished)
       }
       logger.debug("bye from thread to start stream processing")
     } onFailure {
       case error: Throwable =>
         logger.error("Error while setting up stream processing", error)
+        setState(Finished)
     }
 
     // return a function to stop the data processing
@@ -431,7 +570,7 @@ class HybridProcessor(sc: SparkContext,
         userStoppedProcessing = true
       }
       logger.debug("now stopping the StreamingContext")
-      currentStreamingContext.stop(false, true)
+      currentStreamingContext.stop(stopSparkContext = false, stopGracefully = true)
       logger.debug("done stopping the StreamingContext")
       // if stream processing was not started or there was a runtime already
       // computed, we don't update the runtime
@@ -441,13 +580,31 @@ class HybridProcessor(sc: SparkContext,
       logger.info(("processed %s items in %s ms (static) and %s items in " +
         "%s ms (stream)").format(countStatic.value, staticRunTime,
           countStream.value, streamRunTime))
+      setState(Finished)
       (ProcessingInformation(countStatic.value, staticRunTime, maxStaticId.value),
         ProcessingInformation(countStream.value, streamRunTime, maxStreamId.value))
     }, () => maxStaticId.value)
   }
 
 
+  /**
+   * Allows the user to wait for termination of the processing.
+   * If an exception happens during processing, an exception will be thrown here.
+   */
   def awaitTermination() = {
-    ssc_.awaitTermination()
+    logger.debug("user is waiting for termination ...")
+    try {
+      ssc_.awaitTermination()
+      setState(Finished)
+    } catch {
+      case e: Throwable =>
+        logger.warn("StreamingContext threw an exception (\"%s\"), shutting down".format(
+          e.getMessage))
+        // when we got an exception, clean up properly
+        ssc_.stop(stopSparkContext = false, stopGracefully = true)
+        setState(Finished)
+        logger.info(s"streaming context was stopped after exception")
+        throw e
+    }
   }
 }
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptHelpers.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptHelpers.scala
new file mode 100644
index 0000000..dbc4155
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptHelpers.scala
@@ -0,0 +1,150 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import javax.mail.internet.{InternetAddress, MimeMessage}
+import javax.mail.{Message, Session, Transport}
+
+import dispatch.Defaults._
+import dispatch._
+
+import scala.collection.JavaConversions._
+import scala.util.{Failure, Success, Try}
+
+object JavaScriptHelpers {
+  def test(): String = {
+    "test"
+  }
+
+  val h = Http()
+
+  /**
+   * Make a simple HTTP GET request.
+   *
+   * @param where URL to query
+   * @return response body
+   */
+  def httpGet(where: String): Try[String] = {
+    val r = url(where)
+    makeRequest(r)
+  }
+
+  /**
+   * Make a simple HTTP GET request with URL parameters.
+   *
+   * @param where URL to query
+   * @param params a key-value JavaScript object (will be sent as URL
+   *               parameters)
+   * @return response body
+   */
+  def httpGet(where: String, params: java.util.Map[_, _]): Try[String] = {
+    val urlParams = stringifyMap(params)
+    val r = url(where) <<? urlParams
+    makeRequest(r)
+  }
+
+  /**
+   * Make a simple HTTP POST request without a request body.
+   *
+   * @param where URL to query
+   * @return response body
+   */
+  def httpPost(where: String): Try[String] = {
+    val r = url(where).POST
+    makeRequest(r)
+  }
+
+  /**
+   * Make a simple HTTP POST request with a request body.
+   *
+   * @param where URL to query
+   * @param body request body as a UTF-8-encoded string
+   * @return response body
+   */
+  def httpPost(where: String, body: String): Try[String] = {
+    val r = url(where).POST
+      .setBody(body.getBytes("UTF-8")).setBodyEncoding("UTF-8")
+    makeRequest(r)
+  }
+
+  /**
+   * Make a simple HTTP POST request with a request body.
+   *
+   * @param where URL to query
+   * @param params a key-value JavaScript object (will be sent as a
+   *               x-www-form-urlencoded string in the request body)
+   * @return response body
+   */
+  def httpPost(where: String, params: java.util.Map[_, _]): Try[String] = {
+    val postParams = stringifyMap(params)
+    val r = (url(where) << postParams)
+      .setContentType("application/x-www-form-urlencoded", "utf-8")
+    makeRequest(r)
+  }
+
+  // TODO add authentication and give this a nicer interface
+  def sendMail(smtpHost: String, smtpPort: Number,
+               from: String, to: String,
+               subject: String, body: String): Try[String] = {
+    val properties = System.getProperties
+    properties.setProperty("mail.smtp.host", smtpHost)
+    properties.setProperty("mail.smtp.port", smtpPort.intValue.toString)
+    /*if (!user.isEmpty) {
+      properties.setProperty("mail.smtp.user", user)
+    }*/
+    val session = Session.getDefaultInstance(properties)
+    Try {
+      val msg = new MimeMessage(session)
+      msg.setFrom(new InternetAddress(from))
+      msg.setRecipient(Message.RecipientType.TO,
+        new InternetAddress(to))
+      msg.setSubject(subject)
+      msg.setText(body)
+      Transport.send(msg)
+    } match {
+      case Success(_) =>
+        Success("Mail sent")
+      case Failure(err) =>
+        println(err)
+        Failure(err)
+    }
+  }
+
+  /**
+   * Perform a blocking request and return the result as a Try-wrapped string.
+   */
+  protected def makeRequest(req: Req): Try[String] = {
+    h(req OK as.String).either.map(_ match {
+      case Left(err) => Failure(err)
+      case Right(s) => Success(s)
+    }).apply()
+  }
+
+  protected def stringifyMap(obj: java.util.Map[_, _]): Map[String, String] = {
+    (obj.toList.map { case (key, value) =>
+      (key.toString, value match {
+        case s: String =>
+          s
+        case d: java.lang.Double if d.toString.endsWith(".0") =>
+          d.toInt.toString
+        case other => other.toString
+      })
+    }).toMap
+  }
+
+  def javaScriptToScala(s: String) = s
+  def javaScriptToScala(x: Double) = x
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptUDFManager.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptUDFManager.scala
new file mode 100644
index 0000000..1b2af8b
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JavaScriptUDFManager.scala
@@ -0,0 +1,131 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import scala.collection.mutable
+import scala.collection.JavaConversions
+import javax.script.{ScriptEngine, ScriptEngineManager, Invocable}
+
+import scala.util.{Failure, Success, Try}
+
+class JavaScriptUDFManager {
+  // The null is required.
+  // See: http://stackoverflow.com/questions/20168226/sbt-0-13-scriptengine-is-null-for-getenginebyname-javascript
+  private val scriptEngineManager = new ScriptEngineManager(null)
+
+  private val jsEngines = new ThreadLocal[ScriptEngine] {
+    override def initialValue() = createScriptEngine()
+  }
+
+  private case class Mapped(nargs: Int, funcBody: String, var threadIds: List[Long])
+  private val funcs = new mutable.HashMap[String, Mapped]
+
+  // throws javax.script.ScriptException when funcBody is invalid.
+  def register(funcName: String, nargs: Int, funcBody: String): Unit = {
+    val engine = getScriptEngine()
+    val threadId = Thread.currentThread.getId
+
+    funcs.synchronized {
+      def overwriteFunc(): Unit = {
+        funcs += (funcName -> Mapped(nargs, funcBody, List(threadId)))
+      }
+
+      funcs.get(funcName) match {
+        case None =>
+          overwriteFunc()
+        case Some(m) if funcBody != m.funcBody =>
+          overwriteFunc()
+
+        case Some(m) =>
+          if (m.threadIds.contains(threadId))
+            return
+          m.threadIds = threadId :: m.threadIds
+      }
+      engine.eval(funcBody)
+    }
+  }
+
+  private def invoke(funcName: String, args: AnyRef*): AnyRef = {
+    val inv = getInvocableEngine()
+    inv.invokeFunction(funcName, args: _*)
+  }
+
+  def call[T](funcName: String, args: AnyRef*): Option[T] = {
+    Try {
+      invoke(funcName, args:_*).asInstanceOf[T]
+    } match {
+      case Success(value) => Some(value)
+      case Failure(err) => None
+    }
+  }
+
+  def tryCall[T](funcName: String, args: AnyRef*): Try[T] = Try {
+    invoke(funcName, args:_*).asInstanceOf[T]
+  }
+
+  def registerAndCall[T](funcName: String, nargs: Int, funcBody: String, args: AnyRef*): Option[T] = {
+    register(funcName, nargs, funcBody)
+    call[T](funcName, args:_*)
+  }
+
+  def registerAndTryCall[T](funcName: String, nargs: Int, funcBody: String, args: AnyRef*): Try[T] = {
+    register(funcName, nargs, funcBody)
+    tryCall[T](funcName, args:_*)
+  }
+
+  def getNumberOfArgsByFunctionName(fname: String): Option[Int] = funcs.synchronized {
+    funcs.get(fname).map(_.nargs)
+  }
+
+  // This method is required because Rhino may return ConsString (!= java.lang.String)
+  def asScala(x: AnyRef) = {
+    val inv = getInvocableEngine
+    inv.invokeMethod(JavaScriptHelpers, "javaScriptToScala", x)
+  }
+
+  private def getScriptEngine(): ScriptEngine = jsEngines.get
+
+  private def getInvocableEngine(): Invocable = {
+    getScriptEngine().asInstanceOf[Invocable]
+  }
+
+  private def createScriptEngine(): ScriptEngine = {
+    var engine: ScriptEngine = null
+    scriptEngineManager.synchronized {
+      engine = scriptEngineManager.getEngineByName("JavaScript")
+    }
+    if (engine == null) {
+      val threadId = Thread.currentThread.getId
+      throw new Exception("failed to create JavaScript engine in thread %d".format(threadId))
+    }
+    engine.put("jql", JavaScriptHelpers)
+
+    engine
+  }
+}
+
+object JavaScriptUDFManager extends JavaScriptUDFManager
+
+object JavaScriptFeatureFunctionManager extends JavaScriptUDFManager {
+  def callAndGetValues(funcName: String, args: AnyRef*): Map[String, Any] = {
+    tryCall[java.util.Map[String, AnyRef]](funcName, args:_*) match {
+      case Success(m) =>
+        JavaConversions.mapAsScalaMap(m).toMap.mapValues(asScala)
+      case Failure(err) =>
+        throw err
+    }
+  }
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLAST.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLAST.scala
index eb36315..5bf3690 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLAST.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLAST.scala
@@ -15,8 +15,10 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.expressions.Expression
 
-sealed abstract trait JubaQLAST
+sealed trait JubaQLAST
 
 case class
 CreateDatasource(sourceName: String,
@@ -27,21 +29,49 @@ CreateDatasource(sourceName: String,
 case class
 CreateModel(algorithm: String,
             modelName: String,
-            configJson: String,
-            specifier: List[(String, List[String])]) extends JubaQLAST {
-  override def toString: String = "CreateModel(%s,%s,%s,%s)".format(
+            labelOrId: Option[(String, String)],
+            featureExtraction: List[(FeatureFunctionParameters, String)],
+            configJson: String) extends JubaQLAST {
+  override def toString: String = "CreateModel(%s,%s,%s,%s,%s)".format(
     algorithm,
     modelName,
+    labelOrId,
+    featureExtraction,
     if (configJson.size > 13) configJson.take(5) + "..." + configJson.takeRight(5)
-    else configJson,
-    specifier
+    else configJson
   )
 }
 
 case class Update(modelName: String, rpcName: String, source: String) extends JubaQLAST
 
+case class CreateStreamFromSelect(streamName: String, selectPlan: LogicalPlan) extends JubaQLAST
+
+case class CreateStreamFromAnalyze(streamName: String, analyze: Analyze, newColumn: Option[String]) extends JubaQLAST
+
+case class CreateTrigger(dsName: String, condition: Option[Expression], expr: Expression) extends JubaQLAST
+
+case class CreateStreamFromSlidingWindow(streamName: String, windowSize: Int, slideInterval: Int,
+                                         windowType: String, source: LogicalPlan,
+                                         funcSpecs: List[(String, List[Expression], Option[String])],
+                                         postCond: Option[Expression]) extends JubaQLAST
+
 case class Analyze(modelName: String, rpcName: String, data: String) extends JubaQLAST
 
+case class LogStream(streamName: String) extends JubaQLAST
+
+case class Status() extends JubaQLAST
+
 case class Shutdown() extends JubaQLAST
 
+case class StartProcessing(dsName: String) extends JubaQLAST
+
 case class StopProcessing() extends JubaQLAST
+
+case class CreateFunction(funcName: String, args: List[(String, String)],
+                          returnType: String, lang: String, body: String) extends JubaQLAST
+
+case class CreateFeatureFunction(funcName: String, args: List[(String, String)],
+                                 lang: String, body: String) extends JubaQLAST
+
+case class CreateTriggerFunction(funcName: String, args: List[(String, String)],
+                                 lang: String, body: String) extends JubaQLAST
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierPrediction.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLContext.scala
similarity index 79%
rename from processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierPrediction.scala
rename to processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLContext.scala
index 89ffe04..19fa6e4 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierPrediction.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLContext.scala
@@ -13,6 +13,9 @@
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-package us.jubat.jubaql_server.processor.json
+package us.jubat.jubaql_server.processor
 
-case class ClassifierPrediction(label: String, score: Double)
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+
+class JubaQLContext(sc: SparkContext, @transient val parser: JubaQLParser) extends SQLContext(sc)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLParser.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLParser.scala
index 64c9a5d..6156c1c 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLParser.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLParser.scala
@@ -15,12 +15,63 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
-import org.apache.spark.sql.catalyst.SqlParser
+import org.apache.spark.sql.catalyst.types.BooleanType
+import org.apache.spark.sql.catalyst.{SqlLexical, SqlParser}
+import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedAttribute, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import com.typesafe.scalalogging.slf4j.LazyLogging
 
+import scala.util.parsing.input.CharArrayReader._
+
+// TODO: move these to a proper file.
+// TODO: rename to better ones.
+sealed trait FeatureFunctionParameters
+
+case object WildcardAnyParameter extends FeatureFunctionParameters
+
+case class WildcardWithPrefixParameter(prefix: String) extends FeatureFunctionParameters
+
+case class WildcardWithSuffixParameter(suffix: String) extends FeatureFunctionParameters
+
+case class NormalParameters(params: List[String]) extends FeatureFunctionParameters
+
+
 class JubaQLParser extends SqlParser with LazyLogging {
 
+  class JubaQLLexical(keywords: Seq[String]) extends SqlLexical(keywords) {
+    case class CodeLit(chars: String) extends Token {
+      override def toString = "$$"+chars+"$$"
+    }
+
+    // used for parsing $$-delimited code blocks
+    protected lazy val codeDelim: Parser[String] = '$' ~ '$' ^^
+      { case a ~ b => "$$" }
+
+    protected lazy val stringWithoutCodeDelim: Parser[String] = rep1( chrExcept('$', EofCh) ) ^^
+      { case chars => chars mkString "" }
+
+    protected lazy val codeContents: Parser[String] = repsep(stringWithoutCodeDelim, '$') ^^
+      { case words => words mkString "$" }
+
+    override lazy val token: Parser[Token] =
+      ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
+        | rep1(digit) ~ opt('.' ~> rep(digit)) ^^ {
+        case i ~ None    => NumericLit(i mkString "")
+        case i ~ Some(d) => FloatLit(i.mkString("") + "." + d.mkString(""))
+      }
+        | '\'' ~ rep( chrExcept('\'', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
+        | '\"' ~ rep( chrExcept('\"', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
+        | codeDelim ~> codeContents <~ codeDelim ^^ { case chars => CodeLit(chars) }
+        | EofCh ^^^ EOF
+        | codeDelim ~> failure("unclosed code literal")
+        | '\'' ~> failure("unclosed string literal")
+        | '\"' ~> failure("unclosed string literal")
+        | delim
+        | failure("illegal character")
+        )
+  }
+
   protected lazy val CREATE = Keyword("CREATE")
   protected lazy val DATASOURCE = Keyword("DATASOURCE")
   protected lazy val MODEL = Keyword("MODEL")
@@ -32,19 +83,70 @@ class JubaQLParser extends SqlParser with LazyLogging {
   protected lazy val ANALYZE = Keyword("ANALYZE")
   protected lazy val USING = Keyword("USING")
   protected lazy val DATA = Keyword("DATA")
+  protected lazy val LOG = Keyword("LOG")
   protected lazy val STORAGE = Keyword("STORAGE")
   protected lazy val STREAM = Keyword("STREAM")
-  protected lazy val config = Keyword("config")
+  protected lazy val CONFIG = Keyword("CONFIG")
   protected lazy val numeric = Keyword("numeric")
   protected lazy val string = Keyword("string")
   protected lazy val boolean = Keyword("boolean")
+  protected lazy val STATUS = Keyword("STATUS")
   protected lazy val SHUTDOWN = Keyword("SHUTDOWN")
+  protected lazy val START = Keyword("START")
   protected lazy val STOP = Keyword("STOP")
   protected lazy val PROCESSING = Keyword("PROCESSING")
+  protected lazy val FUNCTION = Keyword("FUNCTION")
+  protected lazy val RETURNS = Keyword("RETURNS")
+  protected lazy val LANGUAGE = Keyword("LANGUAGE")
+  protected lazy val FEATURE = Keyword("FEATURE")
+  protected lazy val TRIGGER = Keyword("TRIGGER")
+  protected lazy val FOR = Keyword("FOR")
+  protected lazy val EACH = Keyword("EACH")
+  protected lazy val ROW = Keyword("ROW")
+  protected lazy val EXECUTE = Keyword("EXECUTE")
+  protected lazy val SLIDING = Keyword("SLIDING")
+  protected lazy val WINDOW = Keyword("WINDOW")
+  protected lazy val SIZE = Keyword("SIZE")
+  protected lazy val ADVANCE = Keyword("ADVANCE")
+  protected lazy val TIME = Keyword("TIME")
+  protected lazy val TUPLES = Keyword("TUPLES")
+  protected lazy val OVER = Keyword("OVER")
+
+  override val lexical = new JubaQLLexical(reservedWords)
+
+  // we should allow some common column names that have are also known as keywords
+  protected lazy val colIdent = (COUNT | TIME | STATUS | MODEL | GROUP |
+    ORDER | ident)
+
+  override lazy val baseExpression: PackratParser[Expression] =
+    expression ~ "[" ~ expression <~ "]" ^^ {
+      case base ~ _ ~ ordinal => GetItem(base, ordinal)
+    } |
+      TRUE ^^^ Literal(true, BooleanType) |
+      FALSE ^^^ Literal(false, BooleanType) |
+      cast |
+      "(" ~> expression <~ ")" |
+      function |
+      "-" ~> literal ^^ UnaryMinus |
+      colIdent ^^ UnresolvedAttribute | // was: ident
+      "*" ^^^ Star(None) |
+      literal
+
+  override lazy val projection: Parser[Expression] =
+    expression ~ (opt(AS) ~> opt(colIdent)) ^^ { // was: opt(ident)
+      case e ~ None => e
+      case e ~ Some(a) => Alias(e, a)()
+    }
+
+  protected lazy val streamIdent = ident
+
+  protected lazy val modelIdent = ident
+
+  protected lazy val funcIdent = ident
 
   // column_name column_type
   protected lazy val stringPairs: Parser[(String, String)] = {
-    ident ~ (numeric | string | boolean) ^^ {
+    colIdent ~ (numeric | string | boolean) ^^ {
       case x ~ y => (x, y)
     }
   }
@@ -62,7 +164,7 @@ class JubaQLParser extends SqlParser with LazyLogging {
 
   // CREATE DATASOURCE source_name ( column_name data_type, [...]) FROM sink_id
   protected lazy val createDatasource: Parser[JubaQLAST] = {
-    CREATE ~ DATASOURCE ~> ident ~ opt("(" ~ rep1sep(stringPairs, ",") ~ ")") ~
+    CREATE ~ DATASOURCE ~> streamIdent ~ opt("(" ~ rep1sep(stringPairs, ",") ~ ")") ~
       FROM ~ "(" ~ STORAGE ~ ":" ~ stringLit ~ opt(streamList) <~ ")" ^^ {
       case sourceName ~ rep ~ _ /*FROM*/ ~ _ ~ _ /*STORAGE*/ ~ _ ~ storage ~ streams =>
         rep match {
@@ -80,64 +182,141 @@ class JubaQLParser extends SqlParser with LazyLogging {
     }
   }
 
-  protected lazy val createWith: Parser[(String, List[String])] = {
-    ident ~ ":" ~ stringLit ^^ {
-      case key ~ _ ~ value =>
-        (key, List(value))
-    } |
-      ident ~ ":" ~ "[" ~ rep1sep(stringLit, ",") <~ "]" ^^ {
-        case key ~ _ ~ _ ~ values =>
-          (key, values)
-      }
+  protected lazy val createModel: Parser[JubaQLAST] = {
+    val wildcardAny: Parser[FeatureFunctionParameters] = "*" ^^ {
+      case _ =>
+        WildcardAnyParameter
+    }
+    val wildcardWithPrefixParam: Parser[FeatureFunctionParameters] = ident <~ "*" ^^ {
+      case prefix =>
+        WildcardWithPrefixParameter(prefix)
+    }
+    val wildcardWithSuffixParam: Parser[FeatureFunctionParameters] = "*" ~> ident ^^ {
+      case suffix =>
+        WildcardWithSuffixParameter(suffix)
+    }
+    // wildcardWithSuffixParam is first.
+    // If wildcardAny precedes, *_suffix always matches to wildcardAny.
+    val wildcard: Parser[FeatureFunctionParameters] = wildcardWithSuffixParam | wildcardAny | wildcardWithPrefixParam
+
+    val oneParameter: Parser[NormalParameters] = colIdent ^^ {
+      case param =>
+        NormalParameters(List(param))
+    }
+    // this may take one parameter. Should such behavior avoided?
+    val moreThanOneParameters: Parser[FeatureFunctionParameters] = "(" ~> rep1sep(colIdent, ",") <~ ")" ^^ {
+      case params =>
+        NormalParameters(params)
+    }
+
+    val featureFunctionParameters: Parser[FeatureFunctionParameters] = wildcard | oneParameter | moreThanOneParameters
+
+    val labelOrId: Parser[(String, String)] = "(" ~> ident ~ ":" ~ colIdent <~ ")" ^^ {
+      case labelOrId ~ _ ~ value if labelOrId == "label" || labelOrId == "id" =>
+        (labelOrId, value)
+    }
+
+    val paramsAndFunction: Parser[(FeatureFunctionParameters, String)] = featureFunctionParameters ~ opt(WITH ~> funcIdent) ^^ {
+      case params ~ functionName =>
+        (params, functionName.getOrElse("id"))
+    }
+
+    CREATE ~> jubatusAlgorithm ~ MODEL ~ modelIdent ~ opt(labelOrId) ~ AS ~
+      rep1sep(paramsAndFunction, ",") ~ CONFIG ~ stringLit ^^ {
+      case algorithm ~ _ ~ modelName ~ maybeLabelOrId ~ _ ~ l ~ _ ~ config =>
+        CreateModel(algorithm, modelName, maybeLabelOrId, l, config)
+    }
   }
 
-  // CREATE algorithm_name MODEL jubatus_name WITH config = "json string"
-  protected lazy val createModel: Parser[JubaQLAST] = {
-    CREATE ~> jubatusAlgorithm ~ MODEL ~ ident ~ WITH ~ "(" ~ opt(rep1sep(createWith, ",")) ~ ")" ~ "config" ~ "=" ~ stringLit ^^ {
-      case algorithm ~ _ ~ modelName ~ _ /*with*/ ~ _ ~ cwith ~ _ ~ _ /*config*/ ~ _ ~ config =>
-        CreateModel(algorithm, modelName, config, cwith.getOrElse(List[(String, List[String])]()))
-    }
-  }
-
-  // This select copied from SqlParser, and removed `from` clause.
-  protected lazy val jubaqlSelect: Parser[LogicalPlan] =
-    SELECT ~> opt(DISTINCT) ~ projections ~
-      opt(filter) ~
-      opt(grouping) ~
-      opt(having) ~
-      opt(orderBy) ~
-      opt(limit) <~ opt(";") ^^ {
-      case d ~ p ~ f ~ g ~ h ~ o ~ l =>
-        val base = NoRelation
+  protected lazy val createStreamFromSelect: Parser[JubaQLAST] = {
+    CREATE ~ STREAM ~> streamIdent ~ FROM ~ select ^^ {
+      case streamName ~ _ ~ selectPlan =>
+        CreateStreamFromSelect(streamName, selectPlan)
+    }
+  }
+
+  protected lazy val createStreamFromAnalyze: Parser[JubaQLAST] = {
+    CREATE ~ STREAM ~> streamIdent ~ FROM ~ analyzeStream ~ opt(AS ~> colIdent) ^^ {
+      case streamName ~ _ ~ analyzePlan ~ newColumn =>
+        CreateStreamFromAnalyze(streamName, analyzePlan, newColumn)
+    }
+  }
+
+  protected lazy val createTrigger: Parser[JubaQLAST] = {
+    CREATE ~ TRIGGER ~ ON ~> streamIdent ~ FOR ~ EACH ~ ROW ~ opt(WHEN ~> expression) ~ EXECUTE ~ function ^^ {
+      case dsName ~ _ ~ _ ~ _ ~ condition ~ _ ~ expr =>
+        CreateTrigger(dsName, condition, expr)
+    }
+  }
+
+  protected lazy val createStreamFromSlidingWindow: Parser[JubaQLAST] = {
+    val aggregation: Parser[(String, List[Expression], Option[String])] =
+      (ident | AVG) ~ "(" ~ rep1sep(expression, ",") ~ ")" ~ opt(AS ~> colIdent) ^^ {
+        case funcName ~ _ ~ parameters ~ _ ~ maybeAlias =>
+          (funcName, parameters, maybeAlias)
+      }
+    val aggregationList = rep1sep(aggregation, ",")
+
+    val filter: Parser[Expression] = WHERE ~ expression ^^ { case _ ~ e => e}
+    val having: Parser[Expression] = HAVING ~> expression
+
+    CREATE ~ STREAM ~> streamIdent ~ FROM ~ SLIDING ~ WINDOW ~
+      "(" ~ SIZE ~ numericLit ~ ADVANCE ~ numericLit ~ (TIME | TUPLES) ~ ")" ~
+      OVER ~ streamIdent ~ WITH ~ aggregationList ~ opt(filter) ~ opt(having) ^^ {
+      case streamName ~ _ ~ _ ~ _ ~ _ ~ _ /* FROM SLIDING WINDOW ( SIZE */ ~
+        size ~ _ /* ADVANCE */ ~ advance ~ windowType ~ _ /* ) */ ~
+        _ /* OVER */ ~ source ~ _ /* WITH */ ~ funcSpecs ~ f ~ h =>
+        // start from a table/stream with the given name
+        val base = UnresolvedRelation(Seq(source), None)
+        // apply the precondition
         val withFilter = f.map(f => Filter(f, base)).getOrElse(base)
-        val withProjection =
-          g.map {
-            g =>
-              Aggregate(g, assignAliases(p), withFilter)
-          }.getOrElse(Project(assignAliases(p), withFilter))
-        val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
-        val withHaving = h.map(h => Filter(h, withDistinct)).getOrElse(withDistinct)
-        val withOrder = o.map(o => Sort(o, withHaving)).getOrElse(withHaving)
-        val withLimit = l.map {
-          l => Limit(l, withOrder)
-        }.getOrElse(withOrder)
-        withLimit
+        // select only the column that we use in the window.
+        val allColumns = funcSpecs.map(_._2.last)
+        val withProjection = Project(assignAliases(allColumns), withFilter)
+        // NB. we have to add a Cast to the correct type in every column later,
+        // after we have mapped function names to concrete functions.
+
+        CreateStreamFromSlidingWindow(streamName, size.toInt, advance.toInt,
+          windowType.toLowerCase, withProjection, funcSpecs,
+          h)
     }
+  }
+
+  protected lazy val logStream: Parser[JubaQLAST] = {
+    LOG ~ STREAM ~> streamIdent ^^ {
+      case streamName =>
+        LogStream(streamName)
+    }
+  }
 
   protected lazy val update: Parser[JubaQLAST] = {
-    UPDATE ~ MODEL ~> ident ~ USING ~ ident ~ FROM ~ ident ^^ {
+    UPDATE ~ MODEL ~> modelIdent ~ USING ~ funcIdent ~ FROM ~ streamIdent ^^ {
       case modelName ~ _ ~ rpcName ~ _ ~ source =>
         Update(modelName, rpcName, source)
     }
   }
 
   protected lazy val analyze: Parser[JubaQLAST] = {
-    ANALYZE ~> stringLit ~ BY ~ MODEL ~ ident ~ USING ~ ident ^^ {
+    ANALYZE ~> stringLit ~ BY ~ MODEL ~ modelIdent ~ USING ~ funcIdent ^^ {
       case data ~ _ ~ _ ~ modelName ~ _ ~ rpc =>
         Analyze(modelName, rpc, data)
     }
   }
 
+  protected lazy val analyzeStream: Parser[Analyze] = {
+    ANALYZE ~> streamIdent ~ BY ~ MODEL ~ modelIdent ~ USING ~ funcIdent ^^ {
+      case source ~ _ ~ _ ~ modelName ~ _ ~ rpc =>
+        Analyze(modelName, rpc, source)
+    }
+  }
+
+  protected lazy val status: Parser[JubaQLAST] = {
+    STATUS ^^ {
+      case _ =>
+        Status()
+    }
+  }
+
   protected lazy val shutdown: Parser[JubaQLAST] = {
     SHUTDOWN ^^ {
       case _ =>
@@ -145,6 +324,13 @@ class JubaQLParser extends SqlParser with LazyLogging {
     }
   }
 
+  protected lazy val startProcessing: Parser[JubaQLAST] = {
+    START ~ PROCESSING ~> streamIdent ^^ {
+      case dsName =>
+        StartProcessing(dsName)
+    }
+  }
+
   protected lazy val stopProcessing: Parser[JubaQLAST] = {
     STOP ~> PROCESSING ^^ {
       case _ =>
@@ -152,24 +338,66 @@ class JubaQLParser extends SqlParser with LazyLogging {
     }
   }
 
+  /** A parser which matches a code literal */
+  def codeLit: Parser[String] =
+    elem("code literal", _.isInstanceOf[lexical.CodeLit]) ^^ (_.chars)
+
+  protected lazy val createFunction: Parser[JubaQLAST] = {
+    CREATE ~ FUNCTION ~> funcIdent ~ "(" ~ repsep(stringPairs, ",") ~ ")" ~
+      RETURNS ~ (numeric | string| boolean) ~ LANGUAGE ~ ident ~ AS ~ codeLit ^^ {
+      case f ~ _ ~ args ~ _ ~ _ /*RETURNS*/ ~ retType ~ _ /*LANGUAGE*/ ~ lang ~
+        _ /*AS*/ ~ body =>
+        CreateFunction(f, args, retType, lang, body)
+    }
+  }
+
+  protected lazy val createFeatureFunction: Parser[JubaQLAST] = {
+    CREATE ~ FEATURE ~ FUNCTION ~> funcIdent ~ "(" ~ repsep(stringPairs, ",") ~ ")" ~
+      LANGUAGE ~ ident ~ AS ~ codeLit ^^ {
+      case f ~ _ ~ args ~ _ ~ _ /*LANGUAGE*/ ~ lang ~
+        _ /*AS*/ ~ body =>
+        CreateFeatureFunction(f, args, lang, body)
+    }
+  }
+
+  protected lazy val createTriggerFunction: Parser[JubaQLAST] = {
+    CREATE ~ TRIGGER ~ FUNCTION ~> funcIdent ~ "(" ~ repsep(stringPairs, ",") ~ ")" ~
+      LANGUAGE ~ ident ~ AS ~ codeLit ^^ {
+      case f ~ _ ~ args ~ _ ~ _ /*LANGUAGE*/ ~ lang ~
+        _ /*AS*/ ~ body =>
+        CreateTriggerFunction(f, args, lang, body)
+    }
+  }
+
   protected lazy val jubaQLQuery: Parser[JubaQLAST] = {
     createDatasource |
       createModel |
+      createStreamFromSelect |
+      createStreamFromSlidingWindow |
+      createStreamFromAnalyze |
+      createTrigger |
+      logStream |
       update |
       analyze |
+      status |
       shutdown |
-      stopProcessing
+      startProcessing |
+      stopProcessing |
+      createFunction |
+      createFeatureFunction |
+      createTriggerFunction
   }
 
   // note: apply cannot override incompatible type with parent class
   //override def apply(input: String): Option[JubaQLAST] = {
   def parse(input: String): Option[JubaQLAST] = {
+    logger.info(s"trying to parse '$input'")
     phrase(jubaQLQuery)(new lexical.Scanner(input)) match {
       case Success(r, q) =>
-        logger.debug(s"successfully parsed '$input' into $r")
+        logger.debug(s"successfully parsed input: $r")
         Option(r)
       case x =>
-        logger.warn(s"failed to parse '$input' as JubaQL")
+        logger.warn(s"failed to parse input as JubaQL: $x")
         None
     }
   }
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLPatternLayout.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLPatternLayout.scala
new file mode 100644
index 0000000..fc8d759
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLPatternLayout.scala
@@ -0,0 +1,46 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import java.net.InetAddress
+
+import org.apache.log4j.PatternLayout
+import org.apache.log4j.helpers.{PatternConverter, PatternParser}
+import org.apache.log4j.spi.LoggingEvent
+
+class JubaQLPatternLayout extends PatternLayout {
+  val hostname = InetAddress.getLocalHost().getHostName
+
+  override def createPatternParser(pattern: String): PatternParser = {
+    new PatternParser(pattern) {
+      override def finalizeConverter(c: Char): Unit = {
+        c match {
+          // add a new 'h' pattern to the conversion string
+          case 'h' =>
+            val pc = new PatternConverter {
+              override def convert(event: LoggingEvent): String = {
+                hostname
+              }
+            }
+            addConverter(pc)
+          // all other characters are handled by the original pattern parser
+          case other =>
+            super.finalizeConverter(other)
+        }
+      }
+    }
+  }
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLProcessor.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLProcessor.scala
index 67a5c16..c6a3ee9 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLProcessor.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLProcessor.scala
@@ -20,7 +20,7 @@ import java.net.InetAddress
 import com.typesafe.scalalogging.slf4j.LazyLogging
 import org.apache.spark.SparkContext
 import com.twitter.finagle.{Http, Service}
-import com.twitter.util.Await
+import com.twitter.util.{Duration, Time, Await}
 import org.jboss.netty.handler.codec.http._
 import sun.misc.{SignalHandler, Signal}
 
@@ -63,6 +63,13 @@ object JubaQLProcessor extends LazyLogging {
     }
     logger.debug(s"Starting JubaQLProcessor in run mode $runMode")
 
+    // checkpointDir for Spark
+    val checkpointDir = scala.util.Properties.propOrElse("jubaql.checkpointdir", "")
+    if (checkpointDir.trim.isEmpty) {
+      logger.error("No jubaql.checkpointdir property")
+      System.exit(1)
+    }
+
     // When run through spark-submit, the Java system property "spark.master"
     // will contain the master passed to spark-submit and we *must* use the
     // same; otherwise use "local[3]".
@@ -73,7 +80,7 @@ object JubaQLProcessor extends LazyLogging {
     val sc = new SparkContext(master, "JubaQL Processor")
 
     // start HTTP interface
-    val service: Service[HttpRequest, HttpResponse] = new JubaQLService(sc, runMode)
+    val service: Service[HttpRequest, HttpResponse] = new JubaQLService(sc, runMode, checkpointDir)
     val errorHandler = new HandleExceptions
     logger.info("JubaQLProcessor HTTP server starting")
     val server = Http.serve(":*", errorHandler andThen service)
@@ -101,7 +108,10 @@ object JubaQLProcessor extends LazyLogging {
           unregister(regHandler)
           isRegistered = false
         }
-        Await.result(server.close())
+        // close HTTP server only after a short timeout to finish requests
+        // (otherwise sometimes the response to a SHUTDOWN command won't
+        // arrive at the client)
+        Await.result(server.close(Time.now + Duration.fromSeconds(5)))
       }
     }
 
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLService.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLService.scala
index 184ab5f..ed80a19 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLService.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/JubaQLService.scala
@@ -16,60 +16,104 @@
 package us.jubat.jubaql_server.processor
 
 import java.net.InetAddress
+import java.text.SimpleDateFormat
+import java.util.Date
 import java.util.concurrent.ConcurrentHashMap
 
 import com.twitter.finagle.Service
 import com.twitter.util.{Future => TwFuture, Promise => TwPromise}
 import com.typesafe.scalalogging.slf4j.LazyLogging
 import io.netty.util.CharsetUtil
-import us.jubat.jubaql_server.processor.json.{AnomalyScore, ClassifierPrediction, ClassifierResult, DatumResult}
-import us.jubat.jubaql_server.processor.updater.{Anomaly, Classifier, Recommender}
-import org.apache.spark.SparkContext
-import org.apache.spark.sql.SQLContext
+import RunMode.{Production, Development}
+import us.jubat.jubaql_server.processor.json._
+import us.jubat.jubaql_server.processor.updater._
+import org.apache.spark.{SparkFiles, SparkContext}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Project, BinaryNode, LogicalPlan, UnaryNode}
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.{SQLContext, SchemaRDD}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.DStream
 import org.jboss.netty.buffer.ChannelBuffers
 import org.jboss.netty.handler.codec.http._
 import org.json4s._
 import org.json4s.native.{JsonMethods, Serialization}
+import org.json4s.JsonDSL._
+import sun.misc.Signal
 import us.jubat.anomaly.AnomalyClient
 import us.jubat.classifier.ClassifierClient
 import us.jubat.common.Datum
 import us.jubat.recommender.RecommenderClient
-import us.jubat.yarn.client.{JubatusYarnApplicationStatus, JubatusYarnApplication, Resource}
+import us.jubat.yarn.client.{JubatusYarnApplication, JubatusYarnApplicationStatus, Resource}
 import us.jubat.yarn.common.{LearningMachineType, Location}
 
 import scala.collection._
 import scala.collection.convert.decorateAsScala._
-import scala.concurrent.{Future => ScFuture, Promise => ScPromise, Await => ScAwait, SyncVar}
-import scala.concurrent.duration._
 import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.duration._
+import scala.concurrent.{Await => ScAwait, Future => ScFuture, Promise => ScPromise, SyncVar}
 import scala.util.{Failure, Random, Success, Try}
-import sun.misc.Signal
 
-class JubaQLService(sc: SparkContext, runMode: RunMode)
+class JubaQLService(sc: SparkContext, runMode: RunMode, checkpointDir: String)
   extends Service[HttpRequest, HttpResponse]
   with LazyLogging {
   val random = new Random()
   val parser = new JubaQLParser()
   // alias name for parser is needed to override SQLContext's parser
-  val parserAlias = parser
-  val sqlc = new SQLContext(sc) {
-    override val parser = parserAlias
-  }
-  val sources: concurrent.Map[String, (HybridProcessor, StructType)] =
-    new ConcurrentHashMap[String, (HybridProcessor, StructType)]().asScala
+  val sqlc = new JubaQLContext(sc, parser)
+
+  sqlc.registerFunction("highestScoreLabel", (classes: List[Row]) => {
+    // actually we have a List[(String, Double)], but we get a List[Row]
+    if (classes.isEmpty)
+      ""
+    else {
+      classes.maxBy(_.getDouble(1)).getString(0)
+    }
+  })
+
+  val sources: concurrent.Map[String, (HybridProcessor, Option[StructType])] =
+    new ConcurrentHashMap[String, (HybridProcessor, Option[StructType])]().asScala
   val models: concurrent.Map[String, (JubatusYarnApplication, CreateModel, LearningMachineType)] =
     new ConcurrentHashMap[String, (JubatusYarnApplication, CreateModel, LearningMachineType)]().asScala
-  val startedJubatusInstances: concurrent.Map[String, ScFuture[JubatusYarnApplication]] =
-    new ConcurrentHashMap[String, ScFuture[JubatusYarnApplication]]().asScala
+  val startedJubatusInstances: concurrent.Map[String, (ScFuture[JubatusYarnApplication], CreateModel, LearningMachineType)] =
+    new ConcurrentHashMap[String, (ScFuture[JubatusYarnApplication], CreateModel, LearningMachineType)]().asScala
+
+  // hold all statements received from a client, together with the data source name
+  // TODO replace this by a synchronized version?
+  val preparedStatements: mutable.Queue[(String, PreparedJubaQLStatement)] = new mutable.Queue()
+
+  // hold names of all usable table-like objects, mapping to their main data source name
+  val knownStreamNames: concurrent.Map[String, String] =
+    new ConcurrentHashMap[String, String]().asScala
+
+  // hold feature functions written in JavaScript.
+  val featureFunctions: concurrent.Map[String, String] =
+    new ConcurrentHashMap[String, String]().asScala
+
+  val builtinFeatureFunctions = Set("id")
+
+  val jubatusFeatureFunctions = Set("unigram", "bigram")
+
+  // a feature function is invalid if it is not in one of the three possible sets
+  def invalidFeatureFunctions(ffs: List[String]): Set[String] = {
+    ffs.toSet.
+      diff(featureFunctions.keySet).
+      diff(builtinFeatureFunctions).
+      diff(jubatusFeatureFunctions)
+  }
 
   // set this flag to `false` to prevent the HTTP server from processing queries
   protected val isAcceptingQueries: SyncVar[Boolean] = new SyncVar()
   isAcceptingQueries.put(true)
 
-  // set this flag to `true` to signal to executors they should stop processing
-  protected val executorsShouldFinishProcessing: SyncVar[Boolean] = new SyncVar()
-  executorsShouldFinishProcessing.put(false)
+  // set this value which will be communicated to executors via /status poll
+  protected val driverStatusMessage: SyncVar[String] = new SyncVar()
+  driverStatusMessage.put("running")
 
   // store a function to stop the UPDATE process (if one is running)
   protected var stopUpdateFunc: Option[() => (ProcessingInformation, ProcessingInformation)] = None
@@ -93,10 +137,8 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
       case "/status" =>
         val resp = new DefaultHttpResponse(HttpVersion.HTTP_1_1,
           HttpResponseStatus.OK)
-        if (executorsShouldFinishProcessing.get == true)
-          resp.setContent(ChannelBuffers.copiedBuffer("shutdown", CharsetUtil.UTF_8))
-        else
-          resp.setContent(ChannelBuffers.copiedBuffer("running", CharsetUtil.UTF_8))
+        resp.setContent(ChannelBuffers.copiedBuffer(driverStatusMessage.get,
+          CharsetUtil.UTF_8))
         TwFuture.value(resp)
 
       // if we get POSTed a statement, process it
@@ -106,29 +148,30 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
 
         // create an empty promise and create the processing pipeline
         val command = new TwPromise[String]
-        // TODO: use Either or Future semantics to transport success/failure information
-        val result: TwFuture[Option[String]] = command.map(parseJson).map(_.flatMap(takeAction))
+        val parsedCommand: TwFuture[Either[(Int, String), JubaQLAST]] =
+          command.map(parseJson)
+        val actionResult: TwFuture[Either[(Int, String), JubaQLResponse]] =
+          parsedCommand.map(_.right.flatMap(takeAction))
         // now actually put the received command in the promise,
         //  triggering the processing
         command.setValue(body)
 
         // create an HttpResponse based on the result
-        val responseFuture = result.map(res => {
-          // pick HTTP response code and body
+        val responseFuture = actionResult.map(res => {
+          implicit val formats = DefaultFormats
+          // pick HTTP response code and render JSON body
           val (resp, bodyJson) = res match {
-            case Some(msg) =>
+            case Left((httpStatusCode, errMsg)) =>
+              // there was an error in some inner function
+              logger.warn("error during query processing: " + errMsg)
               (new DefaultHttpResponse(HttpVersion.HTTP_1_1,
-                HttpResponseStatus.OK),
-                // msg may already be a JSON string
-                // TODO: get this type-safe
-                if (msg.startsWith("{") || msg.startsWith("["))
-                  "{\"result\": %s}".format(msg)
-                else
-                  "{\"result\": \"%s\"}".format(msg))
-            case _ =>
+                HttpResponseStatus.valueOf(httpStatusCode)),
+                Serialization.write(ErrorMessage(errMsg)))
+            case Right(result) =>
+              // we got a result that we can render as JSON
               (new DefaultHttpResponse(HttpVersion.HTTP_1_1,
-                HttpResponseStatus.INTERNAL_SERVER_ERROR),
-                "{\"result\": \"error\"}")
+                HttpResponseStatus.OK),
+                Serialization.write(result))
           }
           // add header and body
           resp.addHeader("Content-Type", "application/json; charset=utf-8")
@@ -137,7 +180,6 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
             resp.getStatus.getCode))
           resp
         })
-        logger.debug("[%s] request processing prepared".format(requestId))
         responseFuture
 
       // return 404 in any other case
@@ -149,48 +191,112 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
     }
   }
 
-  protected def parseJson(in: String): Option[JubaQLAST] = {
+  protected def parseJson(in: String): Either[(Int, String), JubaQLAST] = {
     // parse string and extract the "query" field
     JsonMethods.parseOpt(in).map(_ \ "query") match {
       case Some(JString(queryString)) =>
         try {
-          parser.parse(queryString)
+          parser.parse(queryString) match {
+            case None =>
+              val msg = s"unable to parse queryString '$queryString'"
+              logger.error(msg)
+              Left((400, msg))
+            case Some(result) =>
+              Right(result)
+          }
         } catch {
           case e: Throwable =>
-            logger.error(s"unable to parse queryString '$queryString': " + e.getMessage)
-            None
+            Left((400, s"unable to parse queryString '$queryString': " + e.getMessage))
         }
       case Some(other) =>
-        logger.warn(s"received JSON '$in' did not contain a query string")
-        None
+        val msg = s"received JSON '$in' did not contain a query string"
+        logger.warn(msg)
+        Left((400, msg))
+      case None =>
+        val msg = s"received string '$in' was not valid JSON"
+        logger.warn(msg)
+        Left((400, msg))
+    }
+  }
+
+  // takes a JSON-shaped string describing a Jubatus config and adds a
+  // default "converter" part if it is not present
+  protected def complementInputJson(inputJsonString: String): Either[(Int, String), JObject] = {
+    val defaultConverter = JObject(
+      "converter" -> JObject(
+        "num_filter_types" -> JObject(),
+        "num_filter_rules" -> JArray(Nil),
+        "string_filter_types" -> JObject(),
+        "string_filter_rules" -> JArray(Nil),
+        "num_types" -> JObject(),
+        "num_rules" -> JArray(JObject("key" -> "*", "type" -> "num") :: Nil),
+        // define two Jubatus-internal conversion methods
+        "string_types" -> JObject("unigram" -> JObject("method" -> "ngram", "char_num" -> "1"),
+          "bigram" -> JObject("method" -> "ngram", "char_num" -> "2")),
+        "string_rules" -> JArray(
+          // define rules how to recognize keys for internal conversion
+          JObject("key" -> "*-unigram-jubaconv", "type" -> "unigram", "sample_weight" -> "tf", "global_weight" -> "bin") ::
+          JObject("key" -> "*-bigram-jubaconv", "type" -> "bigram", "sample_weight" -> "tf", "global_weight" -> "bin") ::
+          JObject("key" -> "*", "except" -> "*-jubaconv", "type" -> "str", "sample_weight" -> "tf", "global_weight" -> "bin") :: Nil)))
+
+    JsonMethods.parseOpt(inputJsonString) match {
+      case Some(obj: JObject) =>
+        obj.values.get("converter") match {
+          case None =>
+            // if the input has no converter, then append the default one
+            Right(obj ~ defaultConverter)
+          case _ =>
+            // if the input *does* have a converter, use it as is
+            Right(obj)
+        }
+
+      case Some(_) =>
+        Left((400, "input config is not a JSON object."))
+
       case None =>
-        logger.warn(s"received string '$in' was not valid JSON")
-        None
+        Left((400, "input config is not a JSON."))
     }
   }
 
-  protected def takeAction(ast: JubaQLAST): Option[String] = {
+  protected def takeAction(ast: JubaQLAST): Either[(Int, String), JubaQLResponse] = {
     ast match {
       case anything if isAcceptingQueries.get == false =>
-        logger.warn(s"received $anything while shutting down, not taking action")
-        // propagate message to client
-        None
+        val msg = s"received $anything while shutting down, not taking action"
+        logger.warn(msg)
+        Left((503, msg))
 
       case cd: CreateDatasource =>
-        val processor = new HybridProcessor(sc, sqlc, cd.sinkStorage, cd.sinkStreams)
-        // TODO schema must be optional
-        val schema = StructType(cd.columns.map {
-          case (colName, dataType) => {
-            StructField(colName, dataType.toLowerCase match {
-              case "numeric" => LongType
-              case "string" => StringType
-              case "boolean" => BooleanType
-              case _ => ???
-            }, false)
+        if (knownStreamNames.contains(cd.sourceName)) {
+          val msg = "data source '%s' already exists".format(cd.sourceName)
+          logger.warn(msg)
+          Left((400, msg))
+        } else {
+          val processor = new HybridProcessor(sc, sqlc,
+            cd.sinkStorage, cd.sinkStreams,
+            runMode,
+            checkpointDir)
+          val maybeSchema = cd.columns match {
+            case Nil =>
+              None
+            case cols =>
+              Some(StructType(cols.map {
+                case (colName, dataType) => {
+                  StructField(colName, dataType.toLowerCase match {
+                    case "numeric" => DoubleType
+                    case "string" => StringType
+                    case "boolean" => BooleanType
+                    case _ => ???
+                  }, nullable = false)
+                }
+              }))
           }
-        })
-        sources.put(cd.sourceName, (processor, schema))
-        Some("CREATE DATASOURCE")
+          // register this datasource internally so subsequent statements
+          // can look it up
+          sources.put(cd.sourceName, (processor, maybeSchema))
+          // data sources "point" to themselves
+          knownStreamNames += ((cd.sourceName, cd.sourceName))
+          Right(StatementProcessed("CREATE DATASOURCE"))
+        }
 
       case cm: CreateModel =>
         val jubaType: LearningMachineType = cm.algorithm match {
@@ -201,6 +307,22 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
           case "RECOMMENDER" =>
             LearningMachineType.Recommender
         }
+
+        // check if all feature functions exist
+        val badFFs = invalidFeatureFunctions(cm.featureExtraction.map(_._2))
+        if (!badFFs.isEmpty) {
+          val msg = "unknown feature functions: " + badFFs.mkString(", ")
+          logger.warn(msg)
+          return Left((400, msg))
+        }
+
+        val configJsonStr: String = complementInputJson(cm.configJson) match {
+          case Left((errCode, errMsg)) =>
+            return Left((errCode, errMsg))
+          case Right(config) =>
+            import JsonMethods._
+            compact(render(config))
+        }
         // TODO: location, resource
         val resource = Resource(priority = 0, memory = 256, virtualCores = 1)
         val juba: ScFuture[JubatusYarnApplication] = runMode match {
@@ -208,118 +330,535 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
             val location = zookeeper.map {
               case (host, port) => Location(InetAddress.getByName(host), port)
             }
-            JubatusYarnApplication.start(cm.modelName, jubaType, location, cm.configJson, resource, 2)
+            JubatusYarnApplication.start(cm.modelName, jubaType, location, configJsonStr, resource, 2)
           case RunMode.Development =>
-            LocalJubatusApplication.start(cm.modelName, jubaType, cm.configJson)
+            LocalJubatusApplication.start(cm.modelName, jubaType, configJsonStr)
         }
 
         // we keep a reference to the started instance so we can always check its status
         // and wait for it to come up if necessary
         val startedInstance = ScPromise[JubatusYarnApplication]()
-        startedJubatusInstances.put(cm.modelName, startedInstance.future)
+        startedJubatusInstances.put(cm.modelName, (startedInstance.future, cm, jubaType))
         juba onComplete {
           case Success(j) =>
             logger.info("CREATE MODEL succeeded")
             models.put(cm.modelName, (j, cm, jubaType))
             startedInstance.completeWith(juba)
           case Failure(t) =>
-            logger.info("CREATE MODEL failed")
+            logger.warn("CREATE MODEL failed: " + t.getMessage)
             t.printStackTrace()
             startedInstance.completeWith(juba)
         }
-        Some("CREATE MODEL (started)")
+        Right(StatementProcessed("CREATE MODEL (started)"))
 
-      case update: Update =>
-        var model: JubatusYarnApplication = null
-        var jubaType: LearningMachineType = null
-        var cm: CreateModel = null
-        // wait until model is available (when Jubatus is started) or timeout
-        startedJubatusInstances.get(update.modelName).foreach(jubaFut => {
-          if (!jubaFut.isCompleted) {
-            logger.debug("waiting for model %s to come up".format(update.modelName))
-            ScAwait.ready(jubaFut, 1 minute)
-          }
-        })
-        val maybeModel = models.get(update.modelName)
-        maybeModel match {
-          case Some((s, c, ty)) => (s, c, ty)
-            model = s
-            cm = c
-            jubaType = ty
-          case None =>
-            // TODO: error message
-            logger.error("model not found")
-            return None
+      case CreateStreamFromSelect(streamName, selectPlan) =>
+        if (knownStreamNames.contains(streamName)) {
+          val msg = s"stream '$streamName' already exists"
+          logger.warn(msg)
+          Left((400, msg))
+        } else {
+          val refStreams = selectPlan.children.flatMap(collectAllChildren)
+          withStreams(refStreams)(mainDataSource => {
+              // register this stream internally
+              knownStreamNames += ((streamName, mainDataSource))
+              preparedStatements.enqueue((mainDataSource, PreparedCreateStreamFromSelect(streamName,
+                selectPlan, refStreams.toList)))
+              Right(StatementProcessed("CREATE STREAM"))
+          })
         }
 
-        // Note: Theoretically it would as well be possible to address the jubatus
-        // instances directly by looking at `model.jubatusServers`.
-        val jubaHost = model.jubatusProxy.hostAddress
-        val jubaPort = model.jubatusProxy.port
-        val trainSpecifier = cm.specifier.toMap
-        val keys = trainSpecifier.get("datum") match {
-          case Some(list) if list.nonEmpty => list
-          case _ => ??? // TODO: throw exception. datum not specified
+      case CreateStreamFromSlidingWindow(streamName, windowSize, slideInterval,
+        windowType, source, funcSpecs, postCond) =>
+        // pick the correct aggregate functions for the given aggregate list
+        val checkedFuncSpecs = funcSpecs.map {
+          case (funcName, params, alias) =>
+            val maybeAggFun: Either[String, (SomeAggregateFunction[_])] = try {
+              funcName.toLowerCase match {
+                case "avg" =>
+                  AggregateFunctions.checkAvgParams(params)
+                case "stddev" =>
+                  AggregateFunctions.checkStdDevParams(params)
+                case "quantile" =>
+                  AggregateFunctions.checkQuantileParams(params)
+                case "linapprox" =>
+                  AggregateFunctions.checkLinApproxParams(params)
+                case "fourier" =>
+                  AggregateFunctions.checkFourierParams(params)
+                case "wavelet" =>
+                  AggregateFunctions.checkWaveletParams(params)
+                case "histogram" =>
+                  AggregateFunctions.checkHistogramParams(params)
+                case "concat" =>
+                  AggregateFunctions.checkConcatParams(params)
+                case "maxelem" =>
+                  AggregateFunctions.checkMaxElemParams(params)
+                case other =>
+                  Left("unknown aggregation function: " + other)
+              }
+            } catch {
+              case e: Throwable =>
+                Left("error while checking " + funcName + ": " +
+                  e.getMessage)
+            }
+          maybeAggFun match {
+            case Left(msg) =>
+              Left(msg)
+            case Right(aggFun) =>
+              Right((funcName, aggFun, alias))
+          }
+        }
+        // check if we have any errors in the aggregate list
+        val errors = checkedFuncSpecs.collect {
+          case Left(msg) => msg
         }
+        if (errors.size > 0) {
+          val msg = "invalid parameter specification: " + errors.mkString(", ")
+          logger.warn(msg)
+          Left((400, msg))
+        } else if (knownStreamNames.contains(streamName)) {
+          val msg = s"stream '$streamName' already exists"
+          logger.warn(msg)
+          Left((400, msg))
+        } else {
+          val refStreams = source.children.flatMap(collectAllChildren)
 
-        val updater = jubaType match {
-          case LearningMachineType.Anomaly if update.rpcName == "add" =>
-            new Anomaly(jubaHost, jubaPort, cm, keys)
+          withStreams(refStreams)(mainDataSource => {
+              // register this stream internally
+              knownStreamNames += ((streamName, mainDataSource))
+              val flattenedFuncs = checkedFuncSpecs.collect{ case Right(x) => x }
+              // build the schema that will result from this statement
+              // (add one additional column with the window timestamp if the
+              // window is timestamp-based)
+              val typeInfo = flattenedFuncs.map(c => (c._1, c._2.outType, c._3))
+              val schemaHead = if (windowType == "time")
+                StructField("jubaql_timestamp", StringType, nullable = false) :: Nil
+              else
+                Nil
+              val schema = StructType(schemaHead ++ typeInfo.zipWithIndex.map{
+                case ((funcName, dataType, maybeAlias), idx) =>
+                  // if there was an AS given in the statement, fine. if not,
+                  // use the function name (or function name + dollar + index
+                  // if the same function is used multiple times).
+                  val alias = maybeAlias.getOrElse({
+                    if (typeInfo.filter(f => f._3.isEmpty && f._1 == funcName).size > 1)
+                      funcName + "$" + idx
+                    else
+                      funcName
+                  })
+                  StructField(alias, dataType, nullable = false)
+              })
+              // at this point, the `source` already has the pre-condition applied
+              // and the correct columns selected. however, we still need to add the
+              // right casts to Double/String.
+              val headColumns = if (windowType == "time")
+                Alias(Cast(UnresolvedAttribute("jubaql_timestamp"), StringType),
+                  "key")() :: Nil
+              else
+                Nil
+              val projectedSource = source.asInstanceOf[Project]
+              val sourceWithCast = Project(headColumns ++
+                projectedSource.projectList.zip(flattenedFuncs).map{
+                  case (a: Alias, funcDesc) =>
+                    Alias(Cast(a.child, funcDesc._2.inType), a.name)()
+                  case (other, funcDesc) =>
+                    Alias(Cast(other, funcDesc._2.inType), other.name)()
+                }, projectedSource.child)
+              val functionObjects = flattenedFuncs.map(_._2)
+              preparedStatements.enqueue((mainDataSource, PreparedCreateStreamFromSlidingWindow(streamName,
+                windowSize, slideInterval, windowType, sourceWithCast, functionObjects,
+                schema, postCond)))
+              Right(StatementProcessed("CREATE STREAM"))
+            })
+        }
 
-          case LearningMachineType.Classifier if update.rpcName == "train" =>
-            val label = trainSpecifier.get("label") match {
-              case Some(la :: Nil) => la
-              case _ => ??? // TODO: throw exception
+      case cs: CreateStreamFromAnalyze =>
+        val validCombination: (LearningMachineType, String) => Boolean = {
+          case (LearningMachineType.Anomaly, "calc_score") => true
+          case (LearningMachineType.Classifier, "classify") => true
+          case (LearningMachineType.Recommender, "complete_row_from_id") => true
+          case (LearningMachineType.Recommender, "complete_row_from_datum") => true
+          case _ => false
+        }
+        if (knownStreamNames.contains(cs.streamName)) {
+          val msg = "stream '%s' already exists".format(cs.streamName)
+          logger.warn(msg)
+          Left((400, msg))
+        } else {
+          withStream(cs.analyze.data)(mainDataSource => {
+            prepareJubaClient(cs.analyze.modelName, cs.analyze.data, cs.analyze.rpcName,
+              validCombination) match {
+              case Right((modelFut, analyzerFut)) =>
+                // register this stream internally
+                knownStreamNames += ((cs.streamName, mainDataSource))
+                // put the UPDATE statement in the statement queue
+                preparedStatements.enqueue((mainDataSource, PreparedCreateStreamFromAnalyze(cs.streamName,
+                  cs.analyze.modelName, modelFut,
+                  cs.analyze.data, analyzerFut, cs.analyze.rpcName,
+                  cs.newColumn)))
+                Right(StatementProcessed("CREATE STREAM"))
+              case Left((code, msg)) =>
+                Left((code, msg))
             }
-            new Classifier(jubaHost, jubaPort, cm, keys)
+          })
+        }
+
+      case CreateTrigger(dsName, condition, function) =>
+        function match {
+          case f: UnresolvedFunction =>
+            JavaScriptUDFManager.getNumberOfArgsByFunctionName(f.name) match {
+              case None =>
+                val msg = s"no user-defined function named ${f.name}"
+                logger.error(msg)
+                return Left((400, msg))
 
-          case LearningMachineType.Recommender if update.rpcName == "update_row" =>
-            val id = trainSpecifier.get("id") match {
-              case Some(id :: Nil) => id
-              case _ => ??? // TODO: throw exception
+              case Some(nargs) if nargs != f.children.size =>
+                val msg = s"number of arguments is mismatched (number of arguments of ${f.name}} is ${f.children.size}})"
+                logger.error(msg)
+                return Left((400, msg))
+
+              case _ =>
+                // do nothing
             }
-            new Recommender(jubaHost, jubaPort, cm, id, keys)
-
-          case lmt =>
-            logger.error("no matching learning machine for " + lmt)
-            return None
-        }
-
-        // Start to process RDD
-        try sources.get(update.source) match {
-          case Some((rddProcessor, schema)) =>
-            logger.info("UPDATE started")
-            val (host, port) = JubaQLProcessor.getListeningAddress
-            val statusUrl = "http://%s:%s/status".format(host.getHostAddress, port)
-            val stopFun = rddProcessor.start(rddjson => {
-              rddjson.mapPartitions(updater(_, statusUrl))
-            })._1
-            // store the function to stop processing
-            stopUpdateFunc = Some(() => stopFun())
-            Some("UPDATE MODEL")
+          case _ =>
+            val msg = "unintentional Spark SQL builtin function"
+            logger.error(msg)
+            return Left((400, msg))
+        }
+        withStream(dsName)(mainDataSource => {
+          preparedStatements.enqueue((mainDataSource,
+            PreparedCreateTrigger(dsName, condition, function)))
+          Right(StatementProcessed("CREATE TRIGGER"))
+        })
+
+      case LogStream(streamName) =>
+        withStream(streamName)(mainDataSource => {
+          preparedStatements.enqueue((mainDataSource, PreparedLogStream(streamName)))
+          Right(StatementProcessed("LOG STREAM"))
+        })
 
+      case update: Update =>
+        val validCombination: (LearningMachineType, String) => Boolean = {
+          case (LearningMachineType.Anomaly, "add") => true
+          case (LearningMachineType.Classifier, "train") => true
+          case (LearningMachineType.Recommender, "update_row") => true
+          case _ => false
+        }
+        withStream(update.source)(mainDataSource => {
+          prepareJubaClient(update.modelName, update.source, update.rpcName,
+            validCombination) match {
+            case Right((modelFut, updaterFut)) =>
+              // put the UPDATE statement in the statement queue
+              preparedStatements.enqueue((mainDataSource, PreparedUpdate(update.modelName, modelFut,
+                update.source, updaterFut)))
+              Right(StatementProcessed("UPDATE MODEL"))
+            case Left((code, msg)) =>
+              Left((code, msg))
+          }
+        })
+
+      case StartProcessing(sourceName) =>
+        sources.get(sourceName) match {
           case None =>
-            // TODO: error message
-            logger.error("source '%s' not found".format(update.source))
-            None
+            val msg = "unknown data source: " + sourceName
+            logger.warn(msg)
+            Left((400, msg))
+          case Some((processor, _)) if processor.state != Initialized =>
+            val msg = "cannot start processing a data source in state " + processor.state
+            logger.warn(msg)
+            Left((400, msg))
+          case Some((processor, _)) if sources.values.exists(_._1.state == Running) =>
+            val msg = "there is already a running process, try to run STOP PROCESSING first"
+            logger.warn(msg)
+            Left((400, msg))
+          case Some((processor, maybeSchema)) =>
+            logger.info(s"setting up processing pipeline for data source '$sourceName' " +
+              s"with given schema $maybeSchema")
+
+            val rddOperations: mutable.Queue[Either[(Int, String), StreamingContext => Unit]] =
+              preparedStatements.filter(_._1 == sourceName).map(_._2).map(stmt => {
+              logger.debug(s"deal with $stmt")
+              stmt match {
+                // CREATE STREAM ... FROM SELECT ...
+                // => execute a select and register the result as a table
+                case PreparedCreateStreamFromSelect(streamName, selectPlan, _) =>
+                  logger.info(s"adding 'CREATE STREAM $streamName FROM SELECT ...' to pipeline")
+                  Right((ssc: StreamingContext) => {
+                    logger.debug(s"executing 'CREATE STREAM $streamName FROM SELECT ...'")
+                    SchemaDStream.fromSQL(ssc, sqlc,
+                      selectPlan, Some(streamName))
+                    ()
+                  })
+
+                // CREATE STREAM ... FROM SLIDING WINDOW ...
+                case PreparedCreateStreamFromSlidingWindow(streamName, windowSize,
+                  slideInterval, windowType, source, funcSpecs, outSchema, maybePostCond) =>
+                  logger.info(s"adding 'CREATE STREAM $streamName FROM SLIDING WINDOW ...' to pipeline")
+                  val fun = (ssc: StreamingContext) => {
+                    logger.debug(s"executing 'CREATE STREAM $streamName FROM SLIDING WINDOW ...'")
+                    // NB. the precondition is already applied in the `source`
+                    val inputStream = SchemaDStream.fromSQL(ssc, sqlc, source, None)
+                    val rowStream = inputStream.dataStream
+                    val schemaStream = inputStream.schemaStream
+
+                    // compute window stream
+                    val windowStream = if (windowType == "tuples") {
+                      SlidingWindow.byCount(rowStream, windowSize, slideInterval)
+                    } else {
+                      // the first column is the timestamp by construction
+                      val keyedRowStream = rowStream.map(row => {
+                        (Helpers.parseTimestamp(row.getString(0)),
+                          Row(row.tail: _*))
+                      })
+                      // compute window stream
+                      SlidingWindow.byTimestamp(keyedRowStream,
+                        windowSize, slideInterval)
+                    }
+
+                    // if we access the window stream more than once, cache it
+                    if (funcSpecs.size > 1) {
+                      windowStream.persist(StorageLevel.MEMORY_AND_DISK_SER)
+                    }
+                    // apply the i-th aggregate function on the i-th element
+                    // of the selected row
+                    val aggregatedStreams = funcSpecs.zipWithIndex.map{
+                      case (f: DoubleInputAggFun, idx) =>
+                        val doubleStream = windowStream.mapValues(rowWithKey =>
+                          (rowWithKey._1, rowWithKey._2.getDouble(idx)))
+                        doubleStream.transform(f.aggFun _)
+                      case (f: StringInputAggFun, idx) =>
+                        val stringStream = windowStream.mapValues(rowWithKey =>
+                          (rowWithKey._1, rowWithKey._2.getString(idx)))
+                        stringStream.transform(f.aggFun _)
+                    }
+                    // merge the aggregated columns together
+                    val firstStream = aggregatedStreams.head.mapValues(_ :: Nil)
+                    val combinedStream = aggregatedStreams.tail
+                      .foldLeft(firstStream)((left, right) => {
+                        left.join(right).mapValues(lr => lr._1 :+ lr._2)
+                      }).transform(_.sortByKey())
+                    // convert to Rows and add schema
+                    val outRowStream = if (windowType == "tuples") {
+                      combinedStream.map(keyVal => Row(keyVal._2 :_*))
+                    } else {
+                      combinedStream.map(keyVal => {
+                        val data = Helpers.formatTimestamp(keyVal._1) :: keyVal._2
+                        Row(data :_*)
+                      })
+                    }
+                    val outSchemaCopy = outSchema // outSchema is not serializable
+                    val outSchemaStream = schemaStream.map(_ => outSchemaCopy)
+                    // apply the post condition ("HAVING") if present
+                    val filteredOutRowStream = maybePostCond.map(postCond => {
+                        outRowStream.transform(rdd => {
+                          val schemaRdd = sqlc.applySchema(rdd, outSchemaCopy)
+                          schemaRdd.where(postCond)
+                        })
+                      }).getOrElse(outRowStream)
+                    SchemaDStream(sqlc, filteredOutRowStream, outSchemaStream)
+                      .registerStreamAsTable(streamName)
+                    ()
+                  }
+                  Right(fun)
+
+                // CREATE STREAM ... FROM ANALYZE ...
+                // => run updater.analyze on each partition
+                case PreparedCreateStreamFromAnalyze(streamName, modelName,
+                    modelFut, dataSourceName, analyzerFut, rpcName, newColumn) =>
+                  // wait until model is available (when Jubatus is started) or timeout
+                  if (!modelFut.isCompleted) {
+                    logger.debug("waiting for model %s to come up".format(modelName))
+                  } else {
+                    logger.debug("model %s is already up".format(modelName))
+                  }
+                  val maybeModel = Try(ScAwait.result(modelFut, 1.minute))
+                  maybeModel match {
+                    case Failure(t) =>
+                      val msg = "model %s failed to start up: %s".format(
+                        modelName, t.getMessage)
+                      logger.error(msg)
+                      Left((500, msg))
+
+                    case Success(juba) =>
+                      // wait until updater is ready or timeout
+                      Try(ScAwait.result(analyzerFut, 1.minute)) match {
+                        case Failure(t) =>
+                          val msg = "cannot use model %s: %s".format(
+                            modelName, t.getMessage)
+                          logger.error(msg)
+                          Left((500, msg))
+
+                        case Success(updater) =>
+                          val (host, port) = JubaQLProcessor.getListeningAddress
+                          val statusUrl = "http://%s:%s/status".format(host.getHostAddress, port)
+
+                          logger.info(s"adding 'CREATE STREAM $streamName FROM ANALYZE ...' to pipeline")
+                          Right((ssc: StreamingContext) => {
+                            logger.debug(s"executing 'CREATE STREAM $streamName FROM ANALYZE ...'")
+                            SchemaDStream.fromRDDTransformation(ssc, sqlc, dataSourceName, tmpRdd => {
+                            val rddSchema: StructType = tmpRdd.schema
+                            val analyzeFun = UpdaterAnalyzeWrapper(rddSchema, statusUrl,
+                              updater, rpcName)
+                            val newSchema = StructType(rddSchema.fields :+
+                              StructField(newColumn.getOrElse(rpcName),
+                                analyzeFun.dataType, nullable = false))
+                            val newRdd = sqlc.applySchema(tmpRdd.mapPartitionsWithIndex((idx, iter) => {
+                              val formatter = new SimpleDateFormat("HH:mm:ss.SSS")
+                              val hostname = InetAddress.getLocalHost().getHostName()
+                              println("%s @ %s [%s] DEBUG analyzing model from partition %d".format(
+                                formatter.format(new Date), hostname, Thread.currentThread().getName, idx
+                              ))
+                              iter
+                            }).mapPartitions(analyzeFun.apply(_)),
+                              newSchema)
+                            newRdd
+                            }, Some(streamName))
+                            ()
+                          })
+                      }
+                  }
+
+                case PreparedCreateTrigger(dsName, condition, expr) =>
+                  logger.info(s"adding 'CREATE TRIGGER $dsName' to pipeline")
+                  Right((ssc: StreamingContext) => {
+                    logger.debug(s"executing 'CREATE TRIGGER $dsName'")
+                    SchemaDStream.fromTableName(ssc, sqlc, dsName).foreachRDD(rdd => {
+                      val rddWithCondition = condition match {
+                        case None =>
+                          rdd
+                        case Some(c) =>
+                          rdd.where(c)
+                      }
+                      rddWithCondition.select(expr).collect() // count() does not work here.
+                      ()
+                    })
+                  })
+
+                case PreparedLogStream(streamName) =>
+                  logger.info(s"adding 'LOG STREAM $streamName' to pipeline")
+                  Right((ssc: StreamingContext) => {
+                    SchemaDStream.fromTableName(ssc, sqlc, streamName).foreachRDD(rdd => {
+                    logger.debug(s"executing 'LOG STREAM $streamName'")
+                    val dataToPrint = rdd.take(101)
+                    val hasMoreData = dataToPrint.size == 101
+                    val ellipsis =
+                      if (hasMoreData) "\n( ... more items ...)"
+                      else ""
+                    println("STREAM: " + streamName + "\n" +
+                      rdd.schema.fields.map(sf =>
+                        sf.name + " " + sf.dataType).mkString(" | ") + "\n" +
+                        dataToPrint.take(100).map(row => row.mkString(" | ")).mkString("\n") +
+                        ellipsis
+                    )
+                    })
+                    ()
+                  })
+
+                // UPDATE MODEL ... USING ...
+                // => run updater.apply on each partition
+                case PreparedUpdate(modelName, modelFut, dataSourceName, updaterFut) =>
+                  // wait until model is available (when Jubatus is started) or timeout
+                  if (!modelFut.isCompleted) {
+                    logger.debug("waiting for model %s to come up".format(modelName))
+                  } else {
+                    logger.debug("model %s is already up".format(modelName))
+                  }
+                  val maybeModel = Try(ScAwait.result(modelFut, 1.minute))
+                  maybeModel match {
+                    case Failure(t) =>
+                      val msg = "model %s failed to start up: %s".format(
+                        modelName, t.getMessage)
+                      logger.error(msg)
+                      Left((500, msg))
+
+                    case Success(juba) =>
+                      // wait until updater is ready or timeout
+                      Try(ScAwait.result(updaterFut, 1.minute)) match {
+                        case Failure(t) =>
+                          val msg = "cannot update model %s: %s".format(
+                            modelName, t.getMessage)
+                          logger.error(msg)
+                          Left((500, msg))
+
+                        case Success(updater) =>
+                          val (host, port) = JubaQLProcessor.getListeningAddress
+                          val statusUrl = "http://%s:%s/status".format(host.getHostAddress, port)
+
+                          logger.info(s"adding 'UPDATE MODEL $modelName ...' to pipeline")
+                          Right((ssc: StreamingContext) => {
+                            SchemaDStream.fromTableName(ssc, sqlc, dataSourceName).foreachRDD(tmpRdd => {
+                            logger.debug(s"executing 'UPDATE MODEL $modelName ...'")
+                            val rddSchema: StructType = tmpRdd.schema
+                            val updateFun = UpdaterApplyWrapper(rddSchema, statusUrl, updater)
+                            // NOTE: you can add sample(...) here to work only on a subset of the items
+                            tmpRdd.mapPartitionsWithIndex((idx, iter) => {
+                              val formatter = new SimpleDateFormat("HH:mm:ss.SSS")
+                              val hostname = InetAddress.getLocalHost().getHostName()
+                              println("%s @ %s [%s] DEBUG updating model with partition %d".format(
+                                formatter.format(new Date), hostname, Thread.currentThread().getName, idx
+                              ))
+                              iter
+                            }).foreachPartition(updateFun.apply)
+                            })
+                          })
+                      }
+                  }
+
+                // unknown statement type
+                case _ =>
+                  ???
+              }
+            })
+            logger.info("pipeline setup complete (%d items)".format(rddOperations.size))
+
+            rddOperations.collectFirst{ case Left(errDesc) => errDesc } match {
+              // there was an error during pipeline setup
+              case Some((code, msg)) =>
+                Left((code, msg))
+
+              // there was no error, but also no instructions
+              case None if rddOperations.isEmpty =>
+                val msg = "there are no processing instructions"
+                logger.warn(msg)
+                Left((400, msg))
+
+              // there was no error
+              case None =>
+                def transform: SchemaDStream => Unit = inputStream => {
+                  inputStream.registerStreamAsTable(sourceName)
+                  val context = inputStream.dataStream.context
+                  rddOperations.collect{ case Right(fun) => fun }.foreach(_.apply(context))
+                }
+                logger.info("starting HybridProcessor with created pipeline")
+                val stopFun = processor.startTableProcessingGeneral(transform,
+                  maybeSchema, sourceName)._1
+                stopUpdateFunc = Some(() => stopFun())
+                Right(StatementProcessed("START PROCESSING"))
+            }
         }
 
       case ana: Analyze =>
         queryAnalyze(ana) match {
-          case Some(toReturn) =>
-            Some(toReturn)
-          case None =>
-            logger.error("no ANALYZE result for " + ana)
-            None
+          case Left(msgWithErrCode) =>
+            Left(msgWithErrCode)
+          case Right(anaResult) =>
+            Right(AnalyzeResultWrapper(anaResult))
         }
 
+      case s: Status =>
+        val dsStatus = sources.mapValues(_._1.state.toString)
+        val jubaStatus = models.mapValues(_._1 match {
+          case dummy: LocalJubatusApplication => "OK"
+          case real => real.status.toString
+        })
+        Right(StatusResponse("STATUS", dsStatus.toMap, jubaStatus.toMap))
+
       case s: Shutdown =>
         // first set a flag to stop further query processing
         isAcceptingQueries.set(false) // NB. put() has different semantics
-      // stop stream processing
-      val procStats = stopUpdateFunc match {
+        // stop stream processing
+        val procStats = stopUpdateFunc match {
           case Some(func) =>
-            Some(stopStreamProcessing(func))
+            Some(stopStreamProcessing(func, forShutdown = true))
           case _ =>
             logger.info("apparently there was no stream processing running")
             None
@@ -328,7 +867,7 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
         // ever started, independent of complete (successful or failed) or still
         // starting:
         val stoppedJubaFut: Iterable[ScFuture[Unit]] = startedJubatusInstances.map {
-          case (modelName, jubaFut) =>
+          case (modelName, (jubaFut, _, _)) =>
             logger.debug(s"scheduling shutdown for model $modelName")
             // If the startup failed, no need to shutdown. For all non-failed
             // instances (still starting or started successfully), we schedule
@@ -337,52 +876,505 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
         }
         // now convert a list of futures into a future of list and wait until completion
         logger.info("waiting for all Jubatus instances to shut down")
-        ScAwait.ready(ScFuture.sequence(stoppedJubaFut), 1 minute)
+        ScAwait.ready(ScFuture.sequence(stoppedJubaFut), 1.minute)
         // send a KILL signal to us to trigger Spark and Finagle shutdown
         Signal.raise(new Signal("TERM"))
         procStats match {
           case Some((staticInfo, streamInfo)) =>
-            Some("SHUTDOWN (processing time: %s ms/%s ms)".format(
-              staticInfo.runtime, streamInfo.runtime))
+            Right(StatementProcessed("SHUTDOWN (processing time: %s ms/%s ms)".format(
+              staticInfo.runtime, streamInfo.runtime)))
           case _ =>
-            Some("SHUTDOWN")
+            Right(StatementProcessed("SHUTDOWN"))
         }
 
       case sp: StopProcessing =>
         stopUpdateFunc match {
           case Some(func) =>
-            val (staticInfo, streamInfo) = stopStreamProcessing(func)
+            val (staticInfo, streamInfo) = stopStreamProcessing(func, forShutdown = false)
             stopUpdateFunc = None
-            Some("STOP PROCESSING (processing time: %s ms/%s ms)".format(
-              staticInfo.runtime, streamInfo.runtime))
+            Right(StatementProcessed("STOP PROCESSING (processing time: %s ms/%s ms)".format(
+              staticInfo.runtime, streamInfo.runtime)))
           case _ =>
-            logger.warn("apparently there was no stream processing running")
-            None
+            val msg = "apparently there was no stream processing running"
+            logger.warn(msg)
+            Left((400, msg))
+        }
+
+      case CreateFunction(funcName, args, returnType, lang, body) =>
+        // TODO: write log
+        // TODO: pass all args
+        if (!lang.equalsIgnoreCase("JavaScript"))
+          return Left((400, "only JavaScript is supported"))
+        if (args.isEmpty)
+          return Left((400, "args should contain at least one element"))
+
+        val argString = args.map(_._1).mkString(", ")
+        val funcBody = s"function $funcName($argString) { $body }"
+        // try to find bugs in the syntax early
+        try {
+          JavaScriptUDFManager.register(funcName, args.size, funcBody)
+        } catch {
+          case e: Throwable =>
+            // TODO: better message
+            return Left((400, e.getMessage))
+        }
+
+        val validTypes = "numeric" :: "string" :: "boolean" :: Nil
+        args.length match {
+          case n if n <= 0 =>
+            Left((400, "number of arguments must be more than zero."))
+
+          case _ if !validTypes.contains(returnType) =>
+            Left((400, "bad return type"))
+
+          // def nArgsString(nArgs: Int): String =
+          //   (0 until nArgs).map(n => s"x$n").mkString(", ")
+          //
+          // def nParamsString(nParams: Int): String = {
+          //   (0 until nParams).map(n => s"x$n: AnyRef").mkString(", ")
+          // }
+          //
+          // def caseTypeString(sqlType: String, scalaType: String, defaultValue: String, nArgs: Int): String = {
+          //   val args = nArgsString(nArgs)
+          //   val params = nParamsString(nArgs)
+          //   s"""case "$sqlType" =>
+          //      |  sqlc.registerFunction(funcName, ($params) => {
+          //      |    JavaScriptUDFManager.registerAndCall[$scalaType](funcName,
+          //      |      $nArgs, funcBody, $args).getOrElse($defaultValue)
+          //      |  })""".stripMargin
+          // }
+          //
+          // def caseNArgs(nArgs: Int): String = {
+          //   val numericCase = caseTypeString("numeric", "Double", "0.0", nArgs).split("\n").map("    " + _).mkString("\n")
+          //   val stringCase = caseTypeString("string", "String", "\"\"", nArgs).split("\n").map("    " + _).mkString("\n")
+          //   val booleanCase = caseTypeString("boolean", "Boolean", "false", nArgs).split("\n").map("    " + _).mkString("\n")
+          //   s"""case $nArgs =>
+          //      |  returnType match {
+          //      |$numericCase
+          //      |$stringCase
+          //      |$booleanCase
+          //      |  }
+          //      |  Right(StatementProcessed("CREATE FUNCTION"))
+          //      |""".stripMargin
+          // }
+          //
+          // following cases are generated with the above script.
+          case 1 =>
+            returnType match {
+              case "numeric" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Double](funcName,
+                    1, funcBody, x0).getOrElse(0.0)
+                })
+              case "string" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[String](funcName,
+                    1, funcBody, x0).getOrElse("")
+                })
+              case "boolean" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Boolean](funcName,
+                    1, funcBody, x0).getOrElse(false)
+                })
+            }
+            Right(StatementProcessed("CREATE FUNCTION"))
+
+          case 2 =>
+            returnType match {
+              case "numeric" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Double](funcName,
+                    2, funcBody, x0, x1).getOrElse(0.0)
+                })
+              case "string" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[String](funcName,
+                    2, funcBody, x0, x1).getOrElse("")
+                })
+              case "boolean" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Boolean](funcName,
+                    2, funcBody, x0, x1).getOrElse(false)
+                })
+            }
+            Right(StatementProcessed("CREATE FUNCTION"))
+
+          case 3 =>
+            returnType match {
+              case "numeric" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Double](funcName,
+                    3, funcBody, x0, x1, x2).getOrElse(0.0)
+                })
+              case "string" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[String](funcName,
+                    3, funcBody, x0, x1, x2).getOrElse("")
+                })
+              case "boolean" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Boolean](funcName,
+                    3, funcBody, x0, x1, x2).getOrElse(false)
+                })
+            }
+            Right(StatementProcessed("CREATE FUNCTION"))
+
+          case 4 =>
+            returnType match {
+              case "numeric" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Double](funcName,
+                    4, funcBody, x0, x1, x2, x3).getOrElse(0.0)
+                })
+              case "string" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[String](funcName,
+                    4, funcBody, x0, x1, x2, x3).getOrElse("")
+                })
+              case "boolean" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Boolean](funcName,
+                    4, funcBody, x0, x1, x2, x3).getOrElse(false)
+                })
+            }
+            Right(StatementProcessed("CREATE FUNCTION"))
+
+          case 5 =>
+            returnType match {
+              case "numeric" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef, x4: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Double](funcName,
+                    5, funcBody, x0, x1, x2, x3, x4).getOrElse(0.0)
+                })
+              case "string" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef, x4: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[String](funcName,
+                    5, funcBody, x0, x1, x2, x3, x4).getOrElse("")
+                })
+              case "boolean" =>
+                sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef, x4: AnyRef) => {
+                  JavaScriptUDFManager.registerAndCall[Boolean](funcName,
+                    5, funcBody, x0, x1, x2, x3, x4).getOrElse(false)
+                })
+            }
+            Right(StatementProcessed("CREATE FUNCTION"))
+
+          case _ =>
+            Left((400, "too many arguments"))
+        }
+
+      case CreateFeatureFunction(funcName, args, lang, body) =>
+        if (!lang.equalsIgnoreCase("JavaScript")) {
+          val msg = s"language $lang is not supported"
+          logger.warn(msg)
+          return Left((400, msg))
+        }
+        if (args.isEmpty) {
+          val msg = s"a function shall have at least one element"
+          logger.warn(msg)
+          return Left((400, msg))
+        }
+
+        val argString = args.map(_._1).mkString(", ")
+        val funcBody = s"function $funcName($argString) { $body }"
+        // try to find bugs in the syntax early
+        try {
+          JavaScriptFeatureFunctionManager.register(funcName, args.size, funcBody)
+        } catch {
+          case e: Throwable =>
+            val msg = f"the function has syntax error: ${e.getMessage}"
+            logger.warn(msg)
+            return Left((400, msg))
+        }
+
+        featureFunctions += (funcName -> funcBody)
+        Right(StatementProcessed("CREATE FEATURE FUNCTION"))
+
+      case CreateTriggerFunction(funcName, args, lang, body) =>
+        // TODO: write log
+        // TODO: pass all args
+        if (!lang.equalsIgnoreCase("JavaScript")) {
+          val msg = s"language $lang is not supported"
+          logger.warn(msg)
+          return Left((400, msg))
+        }
+        if (args.isEmpty) {
+          val msg = s"a function shall have at least one element"
+          logger.warn(msg)
+          return Left((400, msg))
+        }
+
+        val argString = args.map(_._1).mkString(", ")
+        val funcBody = s"function $funcName($argString) { $body }"
+        // try to find bugs in the syntax early
+        try {
+          JavaScriptUDFManager.register(funcName, args.size, funcBody)
+        } catch {
+          case e: Throwable =>
+            val msg = f"the function has syntax error: ${e.getMessage}"
+            logger.warn(msg)
+            return Left((400, msg))
+        }
+
+        args.length match {
+          case 1 =>
+            // Returns an Int value because registerFunction does not accept a function which returns Unit.
+            // The Int value is not used.
+            sqlc.registerFunction(funcName, (x0: AnyRef) => {
+              JavaScriptUDFManager.registerAndCall[Int](funcName,
+                1, funcBody, x0).getOrElse(0)
+            })
+            Right(StatementProcessed("CREATE TRIGGER FUNCTION"))
+
+          case 2 =>
+            // Returns Int for the above reason.
+            sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef) => {
+              JavaScriptUDFManager.registerAndCall[Int](funcName,
+                2, funcBody, x0, x1).getOrElse(0)
+            })
+            Right(StatementProcessed("CREATE TRIGGER FUNCTION"))
+
+          case 3 =>
+            // Returns Int for the above reason.
+            sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef) => {
+              JavaScriptUDFManager.registerAndCall[Int](funcName,
+                3, funcBody, x0, x1, x2).getOrElse(0)
+            })
+            Right(StatementProcessed("CREATE TRIGGER FUNCTION"))
+
+          case 4 =>
+            // Returns Int for the above reason.
+            sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef) => {
+              JavaScriptUDFManager.registerAndCall[Int](funcName,
+                4, funcBody, x0, x1, x2, x3).getOrElse(0)
+            })
+            Right(StatementProcessed("CREATE TRIGGER FUNCTION"))
+
+          case 5 =>
+            // Returns Int for the above reason.
+            sqlc.registerFunction(funcName, (x0: AnyRef, x1: AnyRef, x2: AnyRef, x3: AnyRef, x4: AnyRef) => {
+              JavaScriptUDFManager.registerAndCall[Int](funcName,
+                5, funcBody, x0, x1, x2, x3, x4).getOrElse(0)
+            })
+            Right(StatementProcessed("CREATE TRIGGER FUNCTION"))
+
+          case _ =>
+            val msg = "too many arguments"
+            logger.warn(msg)
+            Left((400, msg))
         }
 
       case other =>
-        logger.error("no handler for " + other)
-        None
+        val msg = "no handler for " + other
+        logger.error(msg)
+        Left((500, msg))
+    }
+  }
+
+  // collect all tables referenced in a statement
+  protected def collectAllChildren(plan: LogicalPlan): Seq[String] = plan match {
+    case un: UnaryNode =>
+      collectAllChildren(un.child)
+    case bn: BinaryNode =>
+      bn.children.flatMap(collectAllChildren)
+    case UnresolvedRelation(tableIdentifier, _) =>
+      tableIdentifier
+    case other =>
+      Nil
+  }
+
+  protected def prepareJubaClient(modelName: String, sourceName: String, rpcName: String,
+                                  validCombination: (LearningMachineType, String) => Boolean):
+      Either[(Int, String), (ScFuture[JubatusYarnApplication], ScFuture[JubatusClient])] = {
+    // check if the specified model exists (or at least, was started)
+    startedJubatusInstances.get(modelName) match {
+      // no such model was defined before
+      case None =>
+        val msg = "no model called '%s'".format(modelName)
+        logger.info(msg)
+        Left((400, msg))
+
+      // a model was defined before
+      case Some((jubaFut, cm, jubaType)) =>
+        jubaFut.value match {
+          // complete, but with failure
+          case Some(Failure(t)) =>
+            val msg = "model %s failed to start up".format(modelName)
+            logger.error(msg)
+            Left((500, msg))
+
+          // not yet complete (but started) or succeeded
+          case _ =>
+            // check if the specified stream exists
+            if (knownStreamNames.contains(sourceName)) {
+              // we prepare an instance of Update that only needs host and port
+              // of the proxy when Jubatus is ready
+              val almostAnUpdater: Try[(String, Int) => JubatusClient] = Try({
+                // set up a (host, port) => Updater function or throw an exception
+                jubaType match {
+                  case lmt@LearningMachineType.Anomaly
+                    if validCombination(lmt, rpcName) =>
+                    (jubaHost, jubaPort) =>
+                      new Anomaly(jubaHost, jubaPort, cm, featureFunctions)
+
+                  case lmt@LearningMachineType.Classifier
+                    if validCombination(lmt, rpcName) =>
+                    val label = cm.labelOrId match {
+                      case Some(("label", value)) =>
+                        value
+                      case _ =>
+                        val msg = "no label for datum specified"
+                        throw new IllegalArgumentException(msg)
+                    }
+                    (jubaHost, jubaPort) =>
+                      new Classifier(jubaHost, jubaPort, cm, featureFunctions, label)
+
+                  case lmt@LearningMachineType.Recommender
+                    if validCombination(lmt, rpcName) =>
+                    val id = cm.labelOrId match {
+                      case Some(("id", value)) =>
+                        value
+                      case _ =>
+                        val msg = "no id for datum specified"
+                        throw new IllegalArgumentException(msg)
+                    }
+                    (jubaHost, jubaPort) =>
+                      new Recommender(jubaHost, jubaPort, cm, featureFunctions, id)
+
+                  case otherAlgorithm =>
+                    val msg = "'%s' is not a valid method for %s".format(
+                      rpcName, otherAlgorithm
+                    )
+                    logger.warn(msg)
+                    throw new IllegalArgumentException(msg)
+                }
+              })
+              // if that was successful, schedule Updater creation when
+              // Jubatus is ready
+              almostAnUpdater match {
+                case Success(jubaCreator) =>
+                  val updaterFut: ScFuture[JubatusClient] = jubaFut.map(model => {
+                    val jubaHost = model.jubatusProxy.hostAddress
+                    val jubaPort = model.jubatusProxy.port
+                    jubaCreator(jubaHost, jubaPort)
+                  })
+                  // return the futures of Jubatus and Updater
+                  Right((jubaFut, updaterFut))
+                case Failure(t) =>
+                  t match {
+                    case _: IllegalArgumentException =>
+                      logger.warn(t.getMessage)
+                      Left((400, t.getMessage))
+                    case _ =>
+                      val msg = "unable to create Updater: " + t.getMessage
+                      logger.warn(msg)
+                      Left((500, msg))
+                  }
+
+              }
+            } else {
+              val msg = "source '%s' not found".format(sourceName)
+              logger.error(msg)
+              Left((400, msg))
+            }
+        }
+    }
+  }
+
+  protected def acceptsMoreStatements(dataSourceName: String): Boolean = {
+    sources.get(dataSourceName).map(_._1.state == Initialized).getOrElse(false)
+  }
+
+  /**
+   * Run a function after ensuring the referenced stream exists and comes from a
+   * valid data source.
+   */
+  protected def withStream(inputStreamName: String)(handler: String =>
+    Either[(Int, String), JubaQLResponse]): Either[(Int, String), JubaQLResponse] = {
+    knownStreamNames.get(inputStreamName) match {
+      case Some(inputDataSourceName) =>
+        sources.get(inputDataSourceName) match {
+          case Some((inputDataSource, _)) if inputDataSource.state == Initialized =>
+            handler(inputDataSourceName)
+          case Some(_) =>
+            val msg = s"data source '$inputDataSourceName' cannot accept further statements"
+            logger.warn(msg)
+            Left((400, msg))
+          case None =>
+            val msg = "data source with name '%s' does not exist".format(inputDataSourceName)
+            logger.error(msg)
+            Left((500, msg))
+        }
+      case None =>
+        val msg = "source '%s' not found".format(inputStreamName)
+        logger.error(msg)
+        Left((400, msg))
     }
   }
 
-  protected def stopStreamProcessing(stopFun: () => (ProcessingInformation, ProcessingInformation)):
-    (ProcessingInformation, ProcessingInformation) = {
+  /**
+   * Run a function after ensuring all referenced streams exist and come from the
+   * same valid data source.
+   */
+  protected def withStreams(inputStreamNames: Seq[String])(handler: String =>
+    Either[(Int, String), JubaQLResponse]): Either[(Int, String), JubaQLResponse] = {
+    // look up which data source each stream comes from
+    val refDataSources = inputStreamNames.flatMap(knownStreamNames.get(_)).toSet
+
+    // check if there are referenced streams that we don't know
+    (inputStreamNames.filter(!knownStreamNames.contains(_)), refDataSources.toList) match {
+      // all referenced streams are known and they come from just one data source
+      case (Nil, mainDataSource :: Nil) if acceptsMoreStatements(mainDataSource) =>
+        handler(mainDataSource)
+      // data source is not in the correct state
+      case (Nil, mainDataSource :: Nil) =>
+        val msg = s"data source '$mainDataSource' cannot accept further statements"
+        logger.warn(msg)
+        Left((400, msg))
+      // all referenced streams are known, but they reference multiple data sources
+      case (Nil, other) =>
+        val msg = "you cannot use streams from multiple different data sources in one statement"
+        logger.warn(msg)
+        Left((400, msg))
+      // some referenced streams have not been seen before
+      case (unknownStreams, _) =>
+        val msg = "unknown streams: %s".format(unknownStreams.mkString(", "))
+        logger.warn(msg)
+        Left((400, msg))
+    }
+  }
+
+  protected def stopStreamProcessing(stopFun: () => (ProcessingInformation, ProcessingInformation),
+                                     forShutdown: Boolean):
+  (ProcessingInformation, ProcessingInformation) = {
     logger.info("stopping stream processing")
     // tell executors they should stop their processing
-    executorsShouldFinishProcessing.set(true) // NB. put() has different semantics
+    if (forShutdown) {
+      driverStatusMessage.set("shutdown") // NB. put() has different semantics
+    } else {
+      driverStatusMessage.set("stop-and-poll") // NB. put() has different semantics
+    }
     // the following call will block until processing is done completely
     val (staticInfo, streamInfo) = stopFun()
     logger.info("shut down successfully; processed %s/%s items".format(
       staticInfo.itemCount, streamInfo.itemCount
     ))
+    // if we are not executing a SHUTDOWN command, but a STOP PROCESSING
+    // command, we must reset state so that we can continue processing later
+    if (!forShutdown) {
+      driverStatusMessage.set("running") // NB. put() has different semantics
+    }
     (staticInfo, streamInfo)
   }
 
   protected def shutdownJubatus(modelName: String, app: JubatusYarnApplication) = {
     logger.info(s"shutting down model: $modelName")
     try {
-      app.stop()
+      // We have to wait here for the stop() call to complete. If we don't block
+      // until it is done, the main application may exit and kill this thread
+      // (this function is actually called from a future.map()) before Jubatus
+      // is stopped completely.
+      ScAwait.ready(app.stop(), 1 minute)
       logger.info(s"model $modelName shut down successfully")
     } catch {
       case e: Throwable =>
@@ -390,37 +1382,8 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
     }
   }
 
-  protected def extractDatum(keys: List[String], data: String): Datum = {
-    extractDatum(keys, JsonMethods.parse(data))
-  }
-
-  protected def extractDatum(keys: List[String], jvalue: JValue): Datum = {
-    // filter unused filed
-    val filtered = jvalue.filterField {
-      case JField(key, _) => keys.indexOf(key) >= 0
-      case _ => false
-    }
-
-    var datum = new Datum
-    filtered.foreach({
-      j =>
-        val key = j._1
-        j._2 match {
-          case JInt(v) =>
-            datum.addNumber(key, v.toDouble)
-          case JDouble(v) =>
-            datum.addNumber(key, v)
-          case JString(v) =>
-            datum.addString(key, v)
-          case _ =>
-        }
-        j
-    })
-    return datum
-  }
-
-
-  protected def queryAnalyze(ana: Analyze): Option[String] = {
+  protected def queryAnalyze(ana: Analyze): Either[(Int, String), AnalyzeResult] = {
+    // TODO remove duplicated functionality with JubatusClient
     def datumToJson(datum: Datum): DatumResult = {
       DatumResult(
         datum.getStringValues().asScala.map(v => (v.key, v.value)).toMap,
@@ -428,96 +1391,83 @@ class JubaQLService(sc: SparkContext, runMode: RunMode)
       )
     }
     models.get(ana.modelName) match {
-      case Some((s, cm, LearningMachineType.Anomaly)) if ana.rpcName == "calc_score" =>
-        val host = s.jubatusProxy.hostAddress
-        val port = s.jubatusProxy.port
-        val keys = cm.specifier.toMap.get("datum") match {
-          case Some(list) if list.nonEmpty => list
-          case _ => ??? // TODO: throw exception. datum not specified
-        }
-        var datum = extractDatum(keys, ana.data)
-        val anomaly = new AnomalyClient(host, port, ana.modelName, 5)
-        try {
-          val score = AnomalyScore(anomaly.calcScore(datum))
-          implicit val formats = DefaultFormats
-          return Some(Serialization.write(score))
-        } finally {
-          anomaly.getClient.close()
-        }
+      case Some((jubaApp, createModelStmt, machineType)) =>
+        val host = jubaApp.jubatusProxy.hostAddress
+        val port = jubaApp.jubatusProxy.port
 
-      case Some((s, cm, LearningMachineType.Classifier)) if ana.rpcName == "classify" =>
-        val host = s.jubatusProxy.hostAddress
-        val port = s.jubatusProxy.port
-        val keys = cm.specifier.toMap.get("datum") match {
-          case Some(list) if list.nonEmpty => list
-          case _ => ??? // TODO: throw exception. datum not specified
-        }
-        var datum = extractDatum(keys, ana.data)
-        val data = new java.util.LinkedList[Datum]()
-        data.add(datum)
-        val classifier = new ClassifierClient(host, port, ana.modelName, 5)
-        try {
-          val res = classifier.classify(data)
-          if (res.size() >= 1) {
-            // return in json format
-            val retValue = ClassifierResult(res.get(0).asScala.map({
-              f => ClassifierPrediction(f.label, f.score)
-            }).toList)
-            implicit val formats = DefaultFormats
-            return Some(Serialization.write(retValue))
-          } else {
-            // TODO: return error in json
-          }
-        } finally {
-          classifier.getClient().close()
-        }
-      case Some((s, cm, LearningMachineType.Recommender)) if (ana.rpcName == "complete_row_from_id" ||
-        ana.rpcName == "complete_row_from_datum") =>
-        val host = s.jubatusProxy.hostAddress
-        val port = s.jubatusProxy.port
-        ana.rpcName match {
-          case "complete_row_from_id" =>
+        machineType match {
+          case LearningMachineType.Anomaly if ana.rpcName == "calc_score" =>
+            val datum = DatumExtractor.extract(createModelStmt, ana.data, featureFunctions, logger)
+            val anomaly = new AnomalyClient(host, port, ana.modelName, 5)
+            try {
+              Right(AnomalyScore(anomaly.calcScore(datum)))
+            } finally {
+              anomaly.getClient.close()
+            }
+
+          case LearningMachineType.Classifier if ana.rpcName == "classify" =>
+            val datum = DatumExtractor.extract(createModelStmt, ana.data, featureFunctions, logger)
+            val data = new java.util.LinkedList[Datum]()
+            data.add(datum)
+            val classifier = new ClassifierClient(host, port, ana.modelName, 5)
+            try {
+              val res = classifier.classify(data)
+              if (res.size() >= 1) {
+                // return in json format
+                val retValue = ClassifierResult(res.get(0).asScala.map({
+                  f => ClassifierPrediction(f.label, f.score)
+                }).toList)
+                Right(retValue)
+              } else {
+                val msg = "got an empty result from classifier"
+                logger.error(msg)
+                Left((500, msg))
+              }
+            } finally {
+              classifier.getClient.close()
+            }
+
+          case LearningMachineType.Recommender if ana.rpcName == "complete_row_from_id" =>
             val recommender = new RecommenderClient(host, port, ana.modelName, 5)
             try {
               val retDatum = datumToJson(recommender.completeRowFromId(ana.data))
-
-              implicit val formats = DefaultFormats
-              return Some(Serialization.write(retDatum))
+              Right(retDatum)
             } finally {
               recommender.getClient().close()
             }
 
-          case "complete_row_from_datum" =>
-            val keys = cm.specifier.toMap.get("datum") match {
-              case Some(list) if list.nonEmpty => list
-              case _ => ??? // TODO: throw exception. datum not specified
-            }
-            var datum = extractDatum(keys, ana.data)
+          case LearningMachineType.Recommender if ana.rpcName == "complete_row_from_datum" =>
+            val datum = DatumExtractor.extract(createModelStmt, ana.data, featureFunctions, logger)
             val recommender = new RecommenderClient(host, port, ana.modelName, 5)
-
             try {
               val retDatum = datumToJson(recommender.completeRowFromDatum(datum))
-
-              implicit val formats = DefaultFormats
-              return Some(Serialization.write(retDatum))
+              Right(retDatum)
             } finally {
-              recommender.getClient().close()
+              recommender.getClient.close()
             }
+
           case _ =>
+            val msg = "cannot use model '%s' with method '%s'".format(ana.modelName, ana.rpcName)
+            logger.warn(msg)
+            Left((400, msg))
         }
-      case _ =>
-        // error
-        None
+
+      case None =>
+        val msg = "model '%s' does not exist".format(ana.modelName)
+        logger.warn(msg)
+        Left((400, msg))
     }
-    None
   }
 }
 
 sealed trait RunMode
 
 object RunMode {
+
   case class Production(zookeeper: List[(String, Int)]) extends RunMode
+
   case object Development extends RunMode
+
 }
 
 object LocalJubatusApplication extends LazyLogging {
@@ -550,7 +1500,8 @@ object LocalJubatusApplication extends LazyLogging {
 
       val namedPipe = new java.io.File(namedPipePath)
       try {
-        val jubatusProcess = runtime.exec(s"$jubaCmdName -f $namedPipePath")
+        val rpcPort = findAvailablePort()
+        val jubatusProcess = runtime.exec(s"$jubaCmdName -p $rpcPort -f $namedPipePath")
         handleSubProcessOutput(jubatusProcess.getInputStream, System.out, jubaCmdName)
         handleSubProcessOutput(jubatusProcess.getErrorStream, System.err, jubaCmdName)
         val namedPipeWriter = new java.io.PrintWriter(namedPipe)
@@ -560,7 +1511,8 @@ object LocalJubatusApplication extends LazyLogging {
           namedPipeWriter.close()
         }
 
-        new LocalJubatusApplication(jubatusProcess, aLearningMachineName, jubaCmdName)
+        new LocalJubatusApplication(jubatusProcess, aLearningMachineName, jubaCmdName,
+          rpcPort)
       } finally {
         namedPipe.delete()
       }
@@ -595,11 +1547,27 @@ object LocalJubatusApplication extends LazyLogging {
     thread.setDaemon(true)
     thread.start()
   }
+
+  protected def findAvailablePort(): Int = {
+    // connect to ports until we fail to connect to one
+    Stream.from(9199).filter(port => {
+      try {
+        val socket = new java.net.Socket("127.0.0.1", port)
+        socket.close()
+        false
+      } catch {
+        case e: java.net.ConnectException =>
+          true
+        case e: Throwable =>
+          false
+      }
+    }).head
+  }
 }
 
 // LocalJubatusApplication is not a JubatusYarnApplication, but extends JubatusYarnApplication for implementation.
-class LocalJubatusApplication(jubatus: Process, name: String, jubaCmdName: String)
-  extends JubatusYarnApplication(Location(InetAddress.getLocalHost, 9199), List(), null) {
+class LocalJubatusApplication(jubatus: Process, name: String, jubaCmdName: String, port: Int = 9199)
+  extends JubatusYarnApplication(Location(InetAddress.getLocalHost, port), List(), null) {
 
   override def status: JubatusYarnApplicationStatus = {
     throw new NotImplementedError("status is not implemented")
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/PreparedJubaQLStatement.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/PreparedJubaQLStatement.scala
new file mode 100644
index 0000000..2685735
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/PreparedJubaQLStatement.scala
@@ -0,0 +1,60 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.sql.DataType
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.types.StructType
+import scala.concurrent.Future
+import us.jubat.yarn.client.JubatusYarnApplication
+import us.jubat.jubaql_server.processor.updater.JubatusClient
+
+sealed trait PreparedJubaQLStatement
+
+case class PreparedUpdate(modelName: String,
+                          modelFut: Future[JubatusYarnApplication],
+                          dataSourceName: String,
+                          updaterFut: Future[JubatusClient]) extends PreparedJubaQLStatement
+
+case class PreparedCreateStreamFromSelect(streamName: String,
+                              selectPlan: LogicalPlan,
+                              usedTables: List[String]) extends PreparedJubaQLStatement {
+  override def toString: String = {
+    "PreparedCreateStreamFromSelect(%s,<SELECT ...>,%s)".format(streamName, usedTables)
+  }
+}
+
+case class PreparedCreateStreamFromAnalyze(streamName: String,
+                                           modelName: String,
+                                           modelFut: Future[JubatusYarnApplication],
+                                           dataSourceName: String,
+                                           analyzerFut: Future[JubatusClient],
+                                           rpcName: String,
+                                           newColumn: Option[String]) extends PreparedJubaQLStatement
+
+case class PreparedCreateTrigger(dsName: String, condition: Option[Expression], expr: Expression) extends PreparedJubaQLStatement
+
+case class PreparedCreateStreamFromSlidingWindow(streamName: String,
+                                                 windowSize: Int,
+                                                 slideInterval: Int,
+                                                 windowType: String,
+                                                 source: LogicalPlan,
+                                                 funcSpecs: List[SomeAggregateFunction[_]],
+                                                 outSchema: StructType,
+                                                 postCond: Option[Expression]) extends PreparedJubaQLStatement
+
+case class PreparedLogStream(streamName: String) extends PreparedJubaQLStatement
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/RegistrationHandler.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/RegistrationHandler.scala
index a3a843a..0358f87 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/RegistrationHandler.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/RegistrationHandler.scala
@@ -20,7 +20,7 @@ import dispatch._
 import dispatch.Defaults._
 import org.json4s.DefaultFormats
 import org.json4s.native.Serialization
-import jubaql.gateway.json.{Unregister, Register}
+import us.jubat.jubaql_server.processor.json.{Unregister, Register}
 import scala.util.{Failure, Success, Try}
 
 class RegistrationHandler(val registerUrl: String) extends LazyLogging {
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/SchemaDStream.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/SchemaDStream.scala
new file mode 100644
index 0000000..04ea3d4
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/SchemaDStream.scala
@@ -0,0 +1,242 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.types.StructType
+import org.apache.spark.streaming.dstream.{DStream, InputDStream}
+import org.apache.spark.streaming.{StreamingContext, Time}
+
+case class SchemaDStream(sqlc: SQLContext,
+                         dataStream: DStream[Row],
+                         schemaStream: DStream[StructType]) {
+  def print() = foreachRDD(rdd => {
+    println(rdd.schema)
+    rdd.foreach(println)
+  })
+
+  def registerStreamAsTable(name: String): Unit = {
+    foreachRDD(_.registerTempTable(name))
+  }
+
+  def foreachRDD(func: SchemaRDD => Unit): Unit = {
+    // We want to simulate stream.foreachRDD on the dataStream,
+    // but if we say dataStream.foreachRDD(...), we don't have
+    // access to the schema. the only way to merge the two
+    // streams is dataStream.transformWith(schemaStream ...).
+    // Therefore we use this transformWith() function, apply
+    // the function obtained as a parameter therein, and call
+    // count() to force execution.
+    def executeFunction(dataRDD: RDD[Row], schemaRDD: RDD[StructType]): RDD[Unit] = {
+      val schema: StructType = schemaRDD.collect.head
+      val dataWithSchema: SchemaRDD = sqlc.applySchema(dataRDD, schema)
+      val result = func(dataWithSchema)
+      schemaRDD.map(x => result)
+    }
+    dataStream.transformWith(schemaStream, executeFunction _).foreachRDD(_.count())
+  }
+}
+
+/**
+ * Helper object to construct a SchemaDStream from various input formats.
+ */
+object SchemaDStream {
+
+  private class RegisteredTableDStream(@transient ssc: StreamingContext,
+                                       @transient sqlc: SQLContext,
+                                       tableName: String) extends InputDStream[Row](ssc) {
+    override def start(): Unit = {}
+
+    override def stop(): Unit = {}
+
+    override def compute(validTime: Time): Option[RDD[Row]] = {
+      Some(sqlc.table(tableName))
+    }
+  }
+
+  private class SQLResultDStream(@transient ssc: StreamingContext,
+                                 @transient sqlc: SQLContext,
+                                 stmt: Either[String, LogicalPlan]) extends InputDStream[Row](ssc) {
+    override def start(): Unit = {}
+
+    override def stop(): Unit = {}
+
+    override def compute(validTime: Time): Option[RDD[Row]] = {
+      val rdd = stmt match {
+        case Left(s) =>
+          sqlc.sql(s)
+        case Right(p) =>
+          new SchemaRDD(sqlc, p)
+      }
+      Some(rdd)
+    }
+  }
+
+  /**
+   * Create a SchemaDStream from a DStream of JSON strings using schema inference.
+   *
+   * @param regName if Some(s), also register the created stream as a table with that name
+   */
+  def fromStringStream(sqlc: SQLContext,
+                       stream: DStream[String],
+                       regName: Option[String]): SchemaDStream = {
+    val schemaStream: DStream[StructType] = stream.transform(rdd => {
+      try {
+        // the following call will compute the input RDD for schema
+        // inference even if it is never used afterwards
+        rdd.context.parallelize(sqlc.jsonRDD(rdd, 0.5).schema :: Nil)
+      } catch {
+        case e: UnsupportedOperationException if e.getMessage == "empty collection" =>
+          // if the collection is empty, we cannot infer the schema, so we
+          // return an empty schema.
+          // NB. Executing SQL on this (empty) SchemaRDD will fail because the
+          // columns are not known. It is the user's responsibility to
+          // do the "right thing" in that case.
+          val schema = StructType(Nil)
+          rdd.context.parallelize(schema :: Nil)
+        case e: Throwable =>
+          throw e
+      }
+    })
+    val rowStream: DStream[Row] = stream.transformWith(schemaStream,
+      (rows: RDD[String], schemas: RDD[StructType]) => {
+        val schema = schemas.collect().head
+        val rdd = sqlc.jsonRDD(rows, schema)
+        rdd
+      }).cache() // This `cache()` is required for Spark 1.2.2.
+    // register stream as a table
+    val resultStream = SchemaDStream(sqlc, rowStream, schemaStream)
+    regName.foreach(resultStream.registerStreamAsTable)
+    resultStream
+  }
+
+  /**
+   * Create a SchemaDStream from a DStream of JSON strings using given schema.
+   *
+   * @param regName if Some(s), also register the created stream as a table with that name
+   */
+  def fromStringStreamWithSchema(sqlc: SQLContext,
+                                 stream: DStream[String],
+                                 schema: StructType,
+                                 regName: Option[String]): SchemaDStream = {
+    val schemaStream: DStream[StructType] = stream.transform(rdd => {
+      rdd.context.parallelize(schema :: Nil)
+    })
+    val rowStream: DStream[Row] = stream.transform((rows: RDD[String]) => {
+      sqlc.jsonRDD(rows, schema)
+    }).cache() // This `cache()` is required for Spark 1.2.2.
+    // register stream as a table
+    val resultStream = SchemaDStream(sqlc, rowStream, schemaStream)
+    regName.foreach(resultStream.registerStreamAsTable)
+    resultStream
+  }
+
+  /**
+   * Create a SchemaDStream as a transformation on a previously registered stream.
+   *
+   * @param lookupName name of the stream to operate on, as per `registerStreamAsTable()`
+   * @param transformation transformation of the stream
+   * @param regName if Some(s), also register the created stream as a table with that name
+   */
+  def fromRDDTransformation(ssc: StreamingContext,
+                            sqlc: SQLContext,
+                            lookupName: String,
+                            transformation: SchemaRDD => SchemaRDD,
+                            regName: Option[String]): SchemaDStream = {
+    val baseStream = new RegisteredTableDStream(ssc, sqlc,
+      lookupName).cache() // This `cache()` is required for Spark 1.2.2.
+    val schemaStream = baseStream.transform(rdd => rdd match {
+      case s: SchemaRDD =>
+        rdd.context.parallelize(transformation(s).schema :: Nil)
+    })
+    // NB. Just evolving schema and row stream independent of each other
+    // does not seem to be enough any more in Spark 1.2. We also need
+    // to call `sqlc.applySchema()` with the new schema or we will run
+    // into a mysterious "is not cached" exception.
+    val rowStream = baseStream.transformWith(schemaStream,
+          (rdd: RDD[Row], schemaRDD: RDD[StructType]) => {
+        val schema = schemaRDD.collect()(0)
+        val outRdd = rdd match {
+          case s: SchemaRDD =>
+            transformation(s)
+        }
+        sqlc.applySchema(outRdd, schema)
+      }).cache() // This `cache()` is required for Spark 1.2.2.
+    // register stream as a table
+    val resultStream = SchemaDStream(sqlc, rowStream, schemaStream)
+    regName.foreach(resultStream.registerStreamAsTable)
+    resultStream
+  }
+
+  /**
+   * Create a SchemaDStream from a previously registered stream.
+   *
+   * @param lookupName name of the stream, as per `registerStreamAsTable()`
+   * @return
+   */
+  def fromTableName(ssc: StreamingContext,
+                    sqlc: SQLContext,
+                    lookupName: String):
+  SchemaDStream = {
+    fromRDDTransformation(ssc, sqlc, lookupName, x => x, None)
+  }
+
+  /**
+   * Create a SchemaDStream as result of an SQL query in each interval.
+   *
+   * @param statement SQL statement (used tables must have been registered before)
+   * @param regName if Some(s), also register the created stream as a table with that name
+   */
+  def fromSQL(ssc: StreamingContext,
+              sqlc: SQLContext,
+              statement: String,
+              regName: Option[String]): SchemaDStream = {
+    val baseStream = new SQLResultDStream(ssc, sqlc,
+      Left(statement)).cache() // This `cache()` is required for Spark 1.2.2.
+    val schemaStream = baseStream.transform(rdd => rdd match {
+      case s: SchemaRDD =>
+        rdd.context.parallelize(s.schema :: Nil)
+    })
+    // register stream as a table
+    val resultStream = SchemaDStream(sqlc, baseStream, schemaStream)
+    regName.foreach(resultStream.registerStreamAsTable)
+    resultStream
+  }
+
+  /**
+   * Create a SchemaDStream as result of an SQL query in each interval.
+   *
+   * @param selectPlan SQL plan (used tables must have been registered before)
+   * @param regName if Some(s), also register the created stream as a table with that name
+   */
+  def fromSQL(ssc: StreamingContext,
+              sqlc: SQLContext,
+              selectPlan: LogicalPlan,
+              regName: Option[String]): SchemaDStream = {
+    val baseStream = new SQLResultDStream(ssc, sqlc,
+      Right(selectPlan)).cache() // This `cache()` is required for Spark 1.2.2.
+    val schemaStream = baseStream.transform(rdd => rdd match {
+      case s: SchemaRDD =>
+        rdd.context.parallelize(s.schema :: Nil)
+    })
+    // register stream as a table
+    val resultStream = SchemaDStream(sqlc, baseStream, schemaStream)
+    regName.foreach(resultStream.registerStreamAsTable)
+    resultStream
+  }
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/SlidingWindow.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/SlidingWindow.scala
new file mode 100644
index 0000000..be31cbc
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/SlidingWindow.scala
@@ -0,0 +1,655 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import SlidingWindow.ItemKey
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.DStream
+
+import scala.collection.mutable
+
+object SlidingWindow {
+  /**
+   * Compute a timestamp-based sliding window stream from the given stream.
+   *
+   * This means that the key in the (Long, T) stream passed in as a parameter
+   * is interpreted as a timestamp. The items in the stream will be grouped
+   * in buckets of length `windowLengthSec` seconds that start with an offset
+   * of `slidingIntervalSec` seconds to the previous window. Note that this
+   * implies the possibility of varying number of items per window. Empty
+   * windows will not be included in the output stream.
+   *
+   * The stream that is returned consists of items with the form
+   * (windowTimestamp, (itemTimestamp, data)) where
+   * - windowTimestamp is a stream-unique identifier for the window,
+   * - itemTimestamp is the timestamp from the input stream,
+   * - windowTimestamp increases by `slidingIntervalSec * 1000` from one
+   *   window to the next,
+   * - all items that belong to the same window are guaranteed to be in the
+   *   same interval/RDD of the window stream,
+   * - all items that belong to this window have item timestamps in the range
+   *   [windowTimestamp, windowTimestamp + windowLengthSec * 1000).
+   *
+   * This form of representation was chosen because
+   * - although (windowTimestamp, [data1, data2, data3, ...]) is a more natural
+   *   and easier to use form, DStream.groupByKey() (which results in such a
+   *   form) performs very bad, so that we wanted to allow the use of
+   *   reduceByKey(), aggregateByKey() etc. instead,
+   * - for algorithms that depend on the order of items (such as concatenation),
+   *   we kept the original timestamp in there so that the window-internal
+   *   ordering can be restored if required.
+   *
+   * For example, with windowLengthSec = 3 and slidingIntervalSec = 2, if the
+   * input stream looks like
+   *  key:  1420788214031 | 1420788214035 | 1420788215132 | ...
+   *  val:              a |             b |             c | ...
+   * then the output will have the form:
+   *  key:  1420788213000      | 1420788213000      | 1420788213000      | 1420788215000      ...
+   *  val:  (1420788214031, a) | (1420788214035, b) | (1420788215132, c) | (1420788215132, c) ...
+   */
+  def byTimestamp[T](stream: DStream[(ItemKey, T)],
+                     windowLengthSec: Int,
+                     slidingIntervalSec: Int): DStream[(WindowKey, (ItemKey, T))] = {
+    // get the count of items in the stream
+    val slidingItemCount = slidingTotalItemCount(stream)
+    // call the general purpose function
+    slidingWindowByKey(stream, slidingItemCount,
+      windowLengthSec * 1000, slidingIntervalSec * 1000, countWindowsSequentially = false)
+  }
+
+  /**
+   * Compute a count-based sliding window stream from the given stream.
+   *
+   * The items in the stream will be grouped in buckets with `windowLength`
+   * items that start with an offset of `slidingInterval` items to the
+   * previous window.
+   *
+   * The stream that is returned consists of items with the form
+   * (windowIndex, (itemIndex, data)) where
+   * - windowIndex is a stream-unique identifier for the window,
+   * - windowIndex increases by 1 from one window to the next,
+   * - itemIndex is a stream-unique identifier for the data item,
+   * - itemIndex increases by 1 from one item to the next within one window,
+   * - all `windowLength` items that belong to the same window are guaranteed
+   *   to be in the same interval/RDD of the window stream.
+   *
+   * This form of representation was chosen because
+   * - although (windowIndex, [data1, data2, data3, ...]) is a more natural
+   *   and easier to use form, DStream.groupByKey() (which results in such a
+   *   form) performs very bad, so that we wanted to allow the use of
+   *   reduceByKey(), aggregateByKey() etc. instead,
+   * - for algorithms that depend on the order of items (such as concatenation),
+   *   we kept the original timestamp in there so that the window-internal
+   *   ordering can be restored if required.
+   *
+   * For example, with windowLength = 3 and slidingInterval = 2, if the input
+   * stream looks like
+   *  val:  a | b | c | d | e | f | g | h | i  |  j | k | l | m | n  |  o
+   * then the output will have the form:
+   *  key;  0      | 0      | 0      | 1      | 1      | 1      | 2      | ...
+   *  val:  (0, a) | (1, b) | (2, c) | (2, c) | (3, d) | (4, e) | (4, e) | ...
+   */
+  def byCount[T](stream: DStream[T],
+                 windowLength: Int,
+                 slidingInterval: Int): DStream[(WindowKey, (ItemKey, T))] = {
+    // get the count of items in the stream
+    val slidingItemCount = slidingTotalItemCount(stream)
+    // get a global index for the items in the stream
+    val indexedInputStream = addGlobalIndex(stream, slidingItemCount)
+    // call the general purpose function
+    slidingWindowByKey(indexedInputStream, slidingItemCount,
+      windowLength, slidingInterval, countWindowsSequentially = true)
+  }
+
+  type ItemKey = Long
+  type WindowKey = Long
+  protected type LocalWindowKey = Long
+  protected type SubstreamIdx = Int
+
+  /**
+   * Compute a key-based sliding window stream from the given stream.
+   *
+   * The items in the stream will be grouped in buckets so that each bucket
+   * contains all items with keys in the range `[x, x + windowLength)`, where
+   * x increases by `slidingInterval` from one bucket to the next. Note how
+   * count-based and timestamp-based windows are a special case of this (with
+   * index or timestamp as key, respectively). Also note that this implies
+   * the possibility of varying number of items per window. Empty windows will
+   * not be included in the output stream.
+   *
+   * The stream that is returned consists of items with the form
+   * (windowKey, (itemKey, data)) where
+   * - windowKey is a stream-unique identifier for the window,
+   * - itemKey is the key from the input stream.
+   * - all items that belong to the same window are guaranteed to be in the
+   *   same interval/RDD of the window stream.
+   *
+   * This form of representation was chosen because
+   * - although (windowKey, [data1, data2, data3, ...]) is a more natural
+   *   and easier to use form, DStream.groupByKey() (which results in such a
+   *   form) performs very bad, so that we wanted to allow the use of
+   *   reduceByKey(), aggregateByKey() etc. instead,
+   * - for algorithms that want to use the original key (e.g., to infer
+   *   order), we keep the original key in the output stream, too.
+   *
+   * The shape of the windowKey can be controlled via the parameter
+   * `countWindowsSequentially`:
+   * - If this value is true, then windowKey will start at 0 and increase by
+   *   1 between windows.
+   * - If this value is false, then windowKey will be the `x` from the
+   *   introduction paragraph, i.e., correspond to the key values contained
+   *   in the window and increase by `slidingInterval`.
+   *
+   * `slidingItemCount` is a stream as computed by `slidingTotalItemCount()`
+   * on the input stream.
+   */
+  protected def slidingWindowByKey[T](stream: DStream[(ItemKey, T)],
+    slidingItemCount: DStream[ItemCountPair],
+    windowLength: Int, slidingInterval: Int,
+    countWindowsSequentially: Boolean): DStream[(WindowKey, (ItemKey, T))] = {
+
+    type KeyedItem = (ItemKey, T)
+    // stream is expected to look like:
+    //  key:  0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8  ||  9 | 10 | 11 | 12 | 13  ||  14
+    //  val:  a | b | c | d | e | f | g | h | i  ||  j |  k |  l |  m |  n  ||   o
+    // or
+    //  key:  1420788214031 | 1420788214035 | 1420788214132 ||  ...
+    //  val:              a |             b |             c ||  ...
+
+    // slidingItemCount is expected to look like:
+    //  val:  ItemCountPair(0,9)                  ||  ItemCountPair(9,14)   ||  ...
+    // or
+    //  val:  ItemCountPair(0,3)                            ||  ...
+
+    /*
+     * First, read this overview of the algorithm. The documentation for the
+     * single steps are repeated again at the correct position.
+     *
+     *************************************************************************
+     *
+     * Our algorithm consists of the following steps:
+     * 1) Compute how many substreams (streams responsible for a certain subset
+     *    of the windows that can be computed without overlaps, identified by
+     *    the offset of their first window) we will need.
+     * 2) For every substream and every interval of the input stream:
+     *    2.1) Filter out all items that do not belong to any window of that
+     *         substream.
+     *         => `someWindowItems`
+     *    2.2) Compute a substream-local window key for each item.
+     *         => `itemsByLocalWindowKey`
+     *    2.3) Compute the largest window key.
+     *         => `maxLocalWindowKey`
+     * 3) Update a state stream that holds the largest window key per
+     *    substream for this interval and the previous interval.
+     *    => `maxLocalWindowKeyPerSubstream`
+     * 4) For every substream and every interval of the input stream:
+     *    4.1) Get back `itemsByLocalWindowKey` as computed in the previous loop.
+     *    4.2) Get the largest window key in this substream for this and
+     *         the previous interval from the state stream.
+     *         => `maxLocalWindowKey`
+     *    4.3) Decide how to process each item depending on the substream-local
+     *         window key and add a `MergeInfo` flag:
+     *         a) Window key equals the largest window key in the previous
+     *            interval:
+     *            => This item belongs to a partial window that we already saw
+     *               in the last interval. We assume that the previously seen
+     *               items of this partial window are stored in the state
+     *               stream. We will merge this item with the state stream.
+     *               => MergeInfo.PREV
+     *         b) Window key equals the largest window key in this interval:
+     *            => There might be data in future intervals that actually
+     *               belongs to the same window. We will keep this item in the
+     *               state stream for later processing.
+     *               => MergeInfo.NEXT
+     *         c) Otherwise:
+     *            => We can be sure that all items of this item's window are
+     *               contained in this interval, so we will process them right
+     *               now.
+     *               => MergeInfo.NOW
+     *         => `itemsWithMergeInfo`
+     *    4.4) Transform the substream-local window key into a window key
+     *         that is unique across all substreams.
+     *         => `itemsByGlobalWindowKey`
+     *    4.5) Now, pick all items that are to be processed now, i.e. in windows
+     *         where there is no doubt all items are in the current interval.
+     *         => `completeWindowItems`
+     *    4.6) For the rest of the items, a state update is necessary. We want
+     *         to add an indicator to the state update function if the set of
+     *         "items in the state" plus "items now merged with the state"
+     *         forms a complete window, i.e., if these items can also be
+     *         processed now. If the number of all items seen in the interval
+     *         (belonging to a window or not) is larger than the number of items
+     *         with the MergeInfo.PREV property, we know that there can be no
+     *         further items in future intervals that belong to the same partial
+     *         window, so we add a dummy item with a MergeInfo.FIN marker to
+     *         indicate to the state stream update that this partial window can
+     *         be considered complete.
+     *         (There are only two situations where this is not the case:
+     *         Either the interval was empty or all items were PREV items.
+     *         In both situations there may be more data for that window
+     *         in future intervals.)
+     *         => `partialWindowItems`
+     * 5) Update a state stream that holds all items in partial windows per
+     *    substream. (Remember that every substream works with overlap-free
+     *    windows, therefore there can only be one partial window per
+     *    substream waiting for future data.) If there are items with
+     *    MergeInfo.NEXT or MergeInfo.FIN flag in the update data, flag
+     *    the previous data as "can be processed".
+     * 6) Union the "can be processed" items in the state stream and the
+     *    "complete windows" from each substream to obtain the final result.
+     *
+     *************************************************************************
+     */
+
+    /*
+     * 1) Compute how many substreams (streams responsible for a certain subset
+     *    of the windows that can be computed without overlaps, identified by
+     *    the offset of their first window) we will need.
+     */
+
+    // numberOfStreams depends on how much the windows overlaps
+    val numberOfStreams = (windowLength - 1) / slidingInterval + 1
+
+    // batchSize is the length of a window *plus* the length of skipped elements
+    // until the next window in the same stream starts
+    val batchSize = numberOfStreams * slidingInterval
+
+    // stores the highest local window key in this interval for each substream
+    val maxLocalWindowKeysPerSubstream: mutable.Map[SubstreamIdx,
+      DStream[(SubstreamIdx, LocalWindowKey)]] = mutable.Map()
+
+    // constant that holds a value smaller than any valid window index to detect
+    // that there have been no windows yet
+    val impossiblySmallWindowKey = -1L
+
+    // stores all items that are in some (partial or complete) window,
+    // keyed by local window key, for each substream
+    val allWindowItemsPerSubstream: mutable.Map[SubstreamIdx,
+      DStream[(WindowKey, KeyedItem)]] = mutable.Map()
+
+    // stores all items that are in some partial window for each substream
+    val partialWindowsPerSubstream: mutable.Map[SubstreamIdx,
+      DStream[(SubstreamIdx, (WindowKey, (MergeInfo.Value, KeyedItem)))]] =
+      mutable.Map()
+
+    /*
+     * 2) For every substream and every interval of the input stream:
+     */
+
+    // a for loop leads to a NotSerializableException, so fall back to while
+    var _i = 0
+    while (_i < numberOfStreams) {
+      // We need an additional `currentSubstream` val or we will close over the
+      // var object instead of the current value
+      val currentSubstream = _i
+
+      // The currentStreamOffset is the lower bound for the items in the first
+      // window that will be processed in the current substream:
+      val currentStreamOffset = currentSubstream * slidingInterval
+
+      /*
+       * 2.1) Filter out all items that do not belong to any window of that
+       *      substream.
+       */
+
+      val someWindowItems: DStream[KeyedItem] = stream.filter(kv => {
+        val itemKey = kv._1
+        (itemKey + batchSize - currentStreamOffset) % batchSize < windowLength &&
+          itemKey >= currentStreamOffset
+      })
+
+      /*
+       * 2.2) Compute a substream-local window key for each item.
+       */
+
+      val itemsByLocalWindowKey: DStream[(LocalWindowKey, KeyedItem)] =
+        someWindowItems.map(kv => {
+          val itemKey = kv._1
+          val localWindowKey = (itemKey - currentStreamOffset) / batchSize
+          (localWindowKey, kv)
+        })
+      allWindowItemsPerSubstream += ((currentSubstream, itemsByLocalWindowKey))
+
+      /*
+       * 2.3) Compute the largest window key.
+       */
+
+      // the following code computes a 1-element RDD with the maximal
+      // local window key in this substream; the code works similar to
+      // DStream.count
+      val zeroValue = itemsByLocalWindowKey.context.sparkContext.makeRDD(Seq((currentSubstream,
+        impossiblySmallWindowKey)))
+      val maxLocalWindowKey: DStream[(SubstreamIdx, ItemKey)] = itemsByLocalWindowKey
+        .map(x => (currentSubstream, x._1))
+        .transform(_.union(zeroValue))
+        .reduceByKey((a, b) => Math.max(a, b))
+      maxLocalWindowKey.cache()
+
+      maxLocalWindowKeysPerSubstream += ((currentSubstream, maxLocalWindowKey))
+
+      _i += 1
+    }
+
+    /*
+     * 3) Update a state stream that holds the largest window key per
+     *    substream for this interval and the previous interval.
+     */
+
+    val updateMaxWindowKeyState: (Seq[ItemKey], Option[MaxKeyPair]) => Option[MaxKeyPair] =
+      (itemCounts, maybePreviousState) => {
+        val previousState = maybePreviousState.getOrElse(MaxKeyPair(impossiblySmallWindowKey,
+          impossiblySmallWindowKey))
+        val newMaxId = itemCounts.head
+        if (newMaxId == impossiblySmallWindowKey) {
+          // meaning: there was no window in this interval
+          Some(previousState)
+        } else {
+          val newState = previousState.shiftTo(newMaxId)
+          Some(newState)
+        }
+      }
+    val maxLocalWindowKeyPerSubstream = maxLocalWindowKeysPerSubstream.values
+      .reduceLeft(_.union(_))
+      .updateStateByKey(updateMaxWindowKeyState)
+    maxLocalWindowKeyPerSubstream.cache()
+
+    // stores all items that are in a complete window, keyed by local
+    // window key, for each substream
+    val completeWindowsPerSubstream: mutable.Map[SubstreamIdx,
+      DStream[(WindowKey, KeyedItem)]] = mutable.Map()
+
+    /*
+     4) For every substream and every interval of the input stream:
+     */
+
+    // a for loop leads to a NotSerializableException, so fall back to while
+    _i = 0
+    while (_i < numberOfStreams) {
+      // We need an additional `currentSubstream` val or we will close over the
+      // var object instead of the current value
+      val currentSubstream = _i
+
+      /*
+       * 4.1) Get back `itemsByLocalWindowKey` as computed in the previous loop.
+       */
+
+      // restore the itemsByLocalWindowKey from the map
+      val itemsByLocalWindowKey = allWindowItemsPerSubstream(currentSubstream)
+
+      /*
+       * 4.2) Get the largest window key in this substream for this and
+       *      the previous interval from the state stream.
+       */
+
+      // get this substream's state from the global state stream
+      val maxLocalWindowKey = maxLocalWindowKeyPerSubstream
+        .filter(_._1 == currentSubstream).map(_._2)
+
+      /*
+       * 4.3) Decide how to process each item depending on the substream-local
+       *      window key and add a `MergeInfo` flag:
+       */
+
+      val addMergeInfo: (RDD[(LocalWindowKey, KeyedItem)], RDD[MaxKeyPair]) =>
+        RDD[(LocalWindowKey, (MergeInfo.Value, KeyedItem))] = (items, maxKeyPairs) => {
+        val maxWindowKeyPair = maxKeyPairs.take(1).head
+        items.map(keyVal => {
+          val (localWindowKey, data) = keyVal
+
+          /*
+           * a) Window key equals the largest window key in the previous
+           *    interval:
+           *    => This item belongs to a partial window that we already saw
+           *       in the last interval. We assume that the previously seen
+           *       items of this partial window are stored in the state
+           *       stream. We will merge this item with the state stream.
+           */
+
+          if (localWindowKey <= maxWindowKeyPair.previousMaxKey) {
+            (localWindowKey, (MergeInfo.PREV, data))
+          }
+
+          /*
+           * b) Window key equals the largest window key in this interval:
+           *    => There might be data in future intervals that actually
+           *       belongs to the same window. We will keep this item in the
+           *       state stream for later processing.
+           */
+
+          else if (localWindowKey == maxWindowKeyPair.currentMaxKey) {
+            // NB. In this setup, the last window in this interval
+            // will always marked as partial and therefore go
+            // to the state. This could be optimized.
+            (localWindowKey, (MergeInfo.NEXT, data))
+          }
+
+          /*
+           * c) Otherwise:
+           *    => We can be sure that all items of this item's window are
+           *       contained in this interval, so we will process them right
+           *       now.
+           */
+
+          else {
+            (localWindowKey, (MergeInfo.NOW, data))
+          }
+        })
+      }
+      val itemsWithMergeInfo: DStream[(LocalWindowKey, (MergeInfo.Value, KeyedItem))] =
+        itemsByLocalWindowKey.transformWith(maxLocalWindowKey, addMergeInfo)
+
+      /*
+       * 4.4) Transform the substream-local window key into a window key
+       *      that is unique across all substreams.
+       */
+
+      // either count windows starting from 0 and increment by 1,
+      // or assign a key corresponding to the lowest (rounded)
+      // key in the window
+      val transformLocalToGlobalKey: LocalWindowKey => WindowKey =
+        if (countWindowsSequentially)
+          _ * numberOfStreams + currentSubstream
+        else
+          _ * batchSize + currentSubstream * slidingInterval
+      val itemsByGlobalWindowKey: DStream[(WindowKey, (MergeInfo.Value, KeyedItem))] =
+        itemsWithMergeInfo.map(keyVal => {
+          val (localWindowKey, itemWithMergeInfo) = keyVal
+          val globalWindowKey = transformLocalToGlobalKey(localWindowKey)
+          (globalWindowKey, itemWithMergeInfo)
+        })
+
+      /*
+       * 4.5) Now, pick all items that are to be processed now, i.e. in windows
+       *      where there is no doubt all items are in the current interval.
+       */
+
+      val completeWindows = itemsByGlobalWindowKey
+        .filter(_._2._1 == MergeInfo.NOW)
+        .map(kv => (kv._1, kv._2._2))
+      completeWindowsPerSubstream += ((currentSubstream, completeWindows))
+
+      /*
+       * 4.6) For the rest of the items, a state update is necessary. We want
+       *      to add an indicator to the state update function if the set of
+       *      "items in the state" plus "items now merged with the state"
+       *      forms a complete window, i.e., if these items can also be
+       *      processed now. If the number of all items seen in the interval
+       *      (belonging to a window or not) is larger than the number of items
+       *      with the MergeInfo.PREV property, we know that there can be no
+       *      further items in future intervals that belong to the same partial
+       *      window, so we add a dummy item with a MergeInfo.FIN marker to
+       *      indicate to the state stream update that this partial window can
+       *      be considered complete.
+       *      (There are only two situations where this is not the case:
+       *      Either the interval was empty or all items were PREV items.
+       *      In both situations there may be more data for that window
+       *      in future intervals.)
+       */
+
+      val invalidWindowKey = -1L
+      val stateCompleteMarker = itemsByGlobalWindowKey.context.sparkContext.makeRDD(Seq((
+        invalidWindowKey, (MergeInfo.FIN, null.asInstanceOf[KeyedItem]))), 1)
+      val addStateCompleteMarker: (RDD[(WindowKey, (MergeInfo.Value, KeyedItem))],
+        RDD[ItemCountPair]) => RDD[(WindowKey, (MergeInfo.Value, KeyedItem))] =
+        (rdd, counts) => {
+          val numPrevs = rdd.filter(_._2._1 == MergeInfo.PREV).count
+          val countPair = counts.take(1).head
+          val countInThisInterval = countPair.currentCount - countPair.previousCount
+          if (numPrevs < countInThisInterval) {
+            rdd.union(stateCompleteMarker)
+          } else {
+            rdd
+          }
+        }
+      val partialWindowsItems = itemsByGlobalWindowKey.filter(_._2._1 != MergeInfo.NOW)
+      partialWindowsItems.cache()
+      val partialWindows: DStream[(SubstreamIdx, (WindowKey, (MergeInfo.Value, KeyedItem)))] =
+        partialWindowsItems
+          .transformWith(slidingItemCount, addStateCompleteMarker).map((currentSubstream, _))
+
+      partialWindowsPerSubstream += ((currentSubstream, partialWindows))
+
+      _i += 1
+    }
+
+    // union the windows computed by each substream
+    val allPartialWindows = partialWindowsPerSubstream.values.reduceLeft(_.union(_))
+    val allCompleteWindows = completeWindowsPerSubstream.values.reduceLeft(_.union(_))
+
+    /*
+     * 5) Update a state stream that holds all items in partial windows per
+     *    substream. (Remember that every substream works with overlap-free
+     *    windows, therefore there can only be one partial window per
+     *    substream waiting for future data.) If there are items with
+     *    MergeInfo.NEXT or MergeInfo.FIN flag in the update data, flag
+     *    the previous data as "can be processed".
+     */
+
+    type STATE = List[(WindowKey, KeyedItem, Boolean)]
+
+    val stateUpdateFunction: (Seq[(WindowKey, (MergeInfo.Value, KeyedItem))],
+      Option[STATE]) => Option[STATE] = (newValuesInInterval, maybePreviousState) => {
+
+      val previousState: STATE = maybePreviousState.getOrElse(Nil).filter(_._3 == false)
+
+      val mergeIn = newValuesInInterval.filter(_._2._1 == MergeInfo.PREV)
+      val createNew = newValuesInInterval.filter(_._2._1 == MergeInfo.NEXT)
+      val completeCurrentState_? = createNew.size > 0 ||
+        newValuesInInterval.exists(_._2._1 == MergeInfo.FIN)
+
+      val newState = if (completeCurrentState_?) {
+        previousState.map(kv => (kv._1, kv._2, true)) ++
+          mergeIn.map(kv => (kv._1, kv._2._2, true)) ++
+          createNew.map(kv => (kv._1, kv._2._2, false))
+      } else {
+        // we know from the definition of completeCurrentState_? that
+        // createNew is empty
+        previousState ++ mergeIn.map(kv => (kv._1, kv._2._2, false))
+      }
+      Some(newState)
+    }
+    val state = allPartialWindows.updateStateByKey(stateUpdateFunction)
+
+    /*
+     * 6) Union the "can be processed" items in the state stream and the
+     *    "complete windows" from each substream to obtain the final result.
+     */
+
+    val justCompletedWindows = state.flatMap(substreamAndState => {
+      substreamAndState._2
+    }).filter(stateItem =>
+      // take only the completed items (with `true` flag)
+      stateItem._3
+      ).map(kv => (kv._1, kv._2))
+
+    justCompletedWindows.union(allCompleteWindows)
+  }
+
+  /**
+   * Computes a stateful DStream with the number of total counts in the stream.
+   */
+  private def slidingTotalItemCount[T](data: DStream[T]): DStream[ItemCountPair] = {
+    // data
+    // val:  a | b | c | d | e | f | g | h | i  ||  j | k | l | m | n  ||  o
+
+    val itemCounts: DStream[(Null, Long)] = data.count().map((null, _))
+    // itemCounts
+    // key:  null                               ||  null               ||  null
+    // val:  9                                  ||  5                  ||  1
+
+    val updateCountState: (Seq[Long], Option[ItemCountPair]) =>
+      Option[ItemCountPair] = (newItemCounts, maybeOldState) => {
+      val oldState = maybeOldState.getOrElse(ItemCountPair(0L, 0L))
+      val newState = oldState.increaseBy(newItemCounts.head)
+      Some(newState)
+    }
+    val result = itemCounts.updateStateByKey(updateCountState).map(_._2)
+    result.cache()
+    // result
+    // val:  (0,9)                              ||  (9,14)             ||  (14,15)
+    result
+  }
+
+  /**
+   * Adds a globally increasing Long index to every item.
+   */
+  private def addGlobalIndex[T](data: DStream[T],
+                                slidingCount: DStream[ItemCountPair]): DStream[(Long, T)] = {
+    val locallyIndexedStream: DStream[(ItemKey, T)] =
+      data.transform(_.zipWithIndex.map(_.swap))
+
+    // Increase the local indexes to global ones by adding an offset
+    // obtained using a take() output operation on the 1-element state
+    // DStream. See
+    // <http://stackoverflow.com/questions/28080296/how-to-add-an-increasing-integer-id-to-items-in-a-spark-dstream>
+    // for concerns about the validity of this approach.
+    val increaseRDDIndex: (RDD[(ItemKey, T)], RDD[ItemCountPair]) =>
+      RDD[(ItemKey, T)] = (streamData, slidingCount) => {
+      val offset = slidingCount.take(1).head.previousCount
+      streamData.map(keyValue => (keyValue._1 + offset, keyValue._2))
+    }
+
+    locallyIndexedStream.transformWith(slidingCount, increaseRDDIndex)
+    // result
+    // key:  0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8  ||  9 | 10 | 11 | 12 | 13  ||  14
+    // val:  a | b | c | d | e | f | g | h | i  ||  j |  k |  l |  m |  n  ||   o
+  }
+}
+
+/**
+ * A data structure indicating the total count of items seen so far as
+ * well as the total count seen until the previous interval.
+ */
+case class ItemCountPair(previousCount: Long, currentCount: Long) {
+  def increaseBy(diff: Long) = ItemCountPair(currentCount, currentCount + diff)
+}
+
+/**
+ * A data structure indicating the maximum key of items seen so far
+ * as well as the maximum key from the previous interval.
+ */
+case class MaxKeyPair(previousMaxKey: ItemKey, currentMaxKey: ItemKey) {
+  def shiftTo(newVal: ItemKey) = MaxKeyPair(currentMaxKey, newVal)
+}
+
+object MergeInfo extends Enumeration {
+  val PREV, // merge with previous
+  NOW, // process now
+  NEXT, // merge with next
+  FIN = // dummy with meaning "mark partial window as complete"
+    Value
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/StringWrapper.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/StringWrapper.scala
index aecfaed..f1564f2 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/StringWrapper.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/StringWrapper.scala
@@ -15,4 +15,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
+/**
+ * Created by tgp on 11/5/14.
+ */
 case class StringWrapper(value: String)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierResult.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnalyzeResult.scala
similarity index 70%
rename from processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierResult.scala
rename to processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnalyzeResult.scala
index e163751..bae125f 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ClassifierResult.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnalyzeResult.scala
@@ -15,4 +15,18 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.json
 
+// We use a sealed trait to make sure we have all possible
+// response types in *this* file.
+sealed trait AnalyzeResult
+
+case class AnomalyScore(score: Float)
+  extends AnalyzeResult
+
+case class ClassifierPrediction(label: String, score: Double)
+
 case class ClassifierResult(predictions: List[ClassifierPrediction])
+  extends AnalyzeResult
+
+case class DatumResult(string_values: Map[String, String],
+                       num_values: Map[String, Double])
+  extends AnalyzeResult
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnomalyScore.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ErrorMessage.scala
similarity index 95%
rename from processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnomalyScore.scala
rename to processor/src/main/scala/us/jubat/jubaql_server/processor/json/ErrorMessage.scala
index bff5aed..ee3e8e3 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/AnomalyScore.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/ErrorMessage.scala
@@ -15,4 +15,4 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.json
 
-case class AnomalyScore(score: Float)
+case class ErrorMessage(result: String)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/DatumResult.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/JubaQLResponse.scala
similarity index 68%
rename from processor/src/main/scala/us/jubat/jubaql_server/processor/json/DatumResult.scala
rename to processor/src/main/scala/us/jubat/jubaql_server/processor/json/JubaQLResponse.scala
index 32edaa1..5d5bdec 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/DatumResult.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/JubaQLResponse.scala
@@ -15,4 +15,17 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.json
 
-case class DatumResult(string_values: Map[String, String], num_values: Map[String, Double])
+// We use a sealed trait to make sure we have all possible
+// response types in *this* file.
+sealed trait JubaQLResponse
+
+case class StatementProcessed(result: String)
+  extends JubaQLResponse
+
+case class AnalyzeResultWrapper(result: AnalyzeResult)
+  extends JubaQLResponse
+
+case class StatusResponse(result: String,
+  sources: Map[String, String],
+  models: Map[String, String])
+  extends JubaQLResponse
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Register.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Register.scala
index f94cf13..31ec476 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Register.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Register.scala
@@ -13,6 +13,6 @@
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-package jubaql.gateway.json
+package us.jubat.jubaql_server.processor.json
 
 case class Register(action: String, ip: String, port: Int)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Unregister.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Unregister.scala
index 0f00550..93a0c6f 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Unregister.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/json/Unregister.scala
@@ -13,6 +13,6 @@
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-package jubaql.gateway.json
+package us.jubat.jubaql_server.processor.json
 
 case class Unregister(action: String)
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctions.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctions.scala
new file mode 100644
index 0000000..8c6bdab
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctions.scala
@@ -0,0 +1,413 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.udf
+
+import scala.annotation.tailrec
+import scala.math.Ordering
+import scala.reflect.ClassTag
+
+import org.apache.spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.rdd.RDD
+
+import com.typesafe.scalalogging.slf4j.LazyLogging
+
+import us.jubat.jubaql_server.processor.udf.OrderedValueRDDFunctions.rddToOrderedValueRDDFunctions
+
+/**
+ * 数値処理用関数の基底クラスです。
+ */
+trait DoubleValueWithIndexFunction extends LazyLogging {
+  protected def checkValue[_Index](valueWithIndex: (_Index, Double)) = {
+    if (valueWithIndex._2.isInfinity) {
+      val message = s"${valueWithIndex}'s value is Infinity"
+      logger.warn(message)
+      throw new IllegalArgumentException(message)
+    } else if (valueWithIndex._2.isNaN) {
+      val message = s"${valueWithIndex}'s value is NaN"
+      logger.warn(message)
+      throw new IllegalArgumentException(message)
+    } else
+      valueWithIndex
+  }
+
+  import math._
+  protected def nextPowerOf2(value: Int) = pow(2, ceil(log(value) / log(2))).toInt
+}
+
+/**
+ * 文字列処理用関数の基底クラスです。
+ */
+trait StringValueWithIndexFunction extends LazyLogging {
+  protected def checkValue[_Index: Ordering](valueWithIndex: (_Index, String)) = valueWithIndex match {
+    case (index, null) =>
+      val message = s"${valueWithIndex}'s text is null"
+      logger.warn(message)
+      throw new NullPointerException(message)
+    case other => other
+  }
+}
+
+/**
+ * 入力窓内の要素の平均値(average)を返す関数です。
+ */
+object AvgFun extends DoubleValueWithIndexFunction {
+  /**
+   * 入力窓内の要素の key 毎に value の平均値を返します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, result) key: 集約キー, result: 計算結果値です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, Double)] = {
+    require(rdd != null, "rdd is null")
+    case class Aggregate(var sum: Double, var count: Int)
+    rdd.aggregateByKey(Aggregate(0d, 0))(
+      (aggregate, valueWithIndex) => {
+        val value = checkValue(valueWithIndex)._2
+        aggregate.sum += value
+        aggregate.count += 1
+        aggregate
+      },
+      (left, right) => { left.sum += right.sum; left.count += right.count; left }
+    ).mapValues(aggregate => aggregate.sum / aggregate.count)
+  }
+}
+
+/**
+ * 入力窓内の要素の標準偏差値(standard deviate)を返す関数です。
+ */
+object StdDevFun extends DoubleValueWithIndexFunction {
+  /**
+   * 入力窓内の要素の key 毎に value の標準偏差値を返します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, result) key: 集約キー, result: 計算結果値です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, Double)] = {
+    require(rdd != null, "rdd is null")
+    case class Aggregate(var sum: Double, var squaredSum: Double, var count: Int)
+    rdd.aggregateByKey(Aggregate(0d, 0d, 0))(
+      (aggregate, valueWithIndex) => { val value = checkValue(valueWithIndex)._2; aggregate.sum += value; aggregate.squaredSum += math.pow(value, 2); aggregate.count += 1; aggregate },
+      (left, right) => { left.sum += right.sum; left.squaredSum += right.squaredSum; left.count += right.count; left }
+    ).mapValues(aggregate =>
+        aggregate.count match {
+          case count =>
+            val meanSquared = math.pow(aggregate.sum / count, 2)
+            val squaredMean = aggregate.squaredSum / count
+            math.sqrt(squaredMean - meanSquared)
+        }
+      )
+  }
+}
+
+/**
+ * 入力窓内の要素の分位値(quantile)を返す関数です。
+ *
+ * @constructor 分位を指定して処理関数を生成します。
+ * @param position 分位 [0 〜 1]。0のときは最小値、0.5のときは中央値、1のときは最大値をそれぞれ返します。
+ * @throws IllegalArgumentException position < 0 または position > 1 のとき
+ */
+class QuantileFun(position: Double = 0.5d) extends DoubleValueWithIndexFunction with Serializable {
+  require(position >= 0d, "position < 0")
+  require(position <= 1d, "position > 1")
+
+  /**
+   * 入力窓内の要素の key 毎に value の分位値を返します。
+   * 数が合わない時は最も近い値を取ります。
+   * 候補が2つあるときは大きい値を取ります。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, value) key: 集約キー, value 分位値です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, Double)] = {
+    require(rdd != null, "rdd is null")
+    import org.apache.commons.math3.stat.descriptive.rank.Percentile
+    implicit val valueOrdering: Ordering[(_Index, Double)] = new Ordering[(_Index, Double)] { override def compare(left: (_Index, Double), right: (_Index, Double)) = left._2.compare(right._2) }
+    rdd.groupByKeyAndSortValues(checkValue).mapValues {
+      values =>
+        val index = ((values.size - 1) * position).round.toInt
+        values.iterator.drop(index).next()._2
+    }
+  }
+}
+
+/**
+ * 入力窓内の要素を最小2乗法で直線近似したときの傾き(slope)と切片(intercept)を返す関数です。
+ */
+object LinApproxFun extends DoubleValueWithIndexFunction {
+  /**
+   * 入力窓内の要素の key 毎に value の slope と intercept を返します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, (slope, intercept)) key: 集約キー, slope: 傾き, intercept: 切片です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index: Ordering](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, (Double, Double))] = {
+    require(rdd != null, "rdd is null")
+    import org.apache.commons.math3.stat.regression.SimpleRegression
+    rdd.groupByKeyAndSortValues(checkValue).mapValues {
+      values =>
+        val regression = new SimpleRegression()
+        values.zipWithIndex.foreach { case ((_, value), index) => regression.addData(index, value) }
+        (regression.getSlope(), regression.getIntercept())
+    }
+  }
+}
+
+/**
+ * 入力窓内の要素を離散フーリエ変換したときの係数(coefficient)を返す関数です。
+ */
+object FourierCoeffsFun extends DoubleValueWithIndexFunction {
+  /**
+   * 入力窓内の要素の key 毎に value を離散フーリエ変換したときの coefficient を返します。
+   * もし rdd の key 毎の要素数が2のn乗でないときは最小の2のn乗まで要素の後ろにゼロ詰めを行ってから処理します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, coefficients) key: 集約キー, coefficients: 係数です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index: Ordering](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, (Seq[Double], Seq[Double]))] = {
+    require(rdd != null, "rdd is null")
+
+    import org.apache.commons.math3.transform.FastFourierTransformer
+    import org.apache.commons.math3.transform.DftNormalization
+    import org.apache.commons.math3.transform.TransformType
+
+    rdd.groupByKeyAndSortValues(checkValue).mapValues {
+      values =>
+        val size = nextPowerOf2(values.size)
+        val data = Array.ofDim[Double](2, size)
+        values.toSeq.map(_._2).copyToArray(data(0))
+        FastFourierTransformer.transformInPlace(data, DftNormalization.STANDARD, TransformType.INVERSE)
+        (data(0), data(1))
+    }
+  }
+}
+
+/**
+ * 入力窓内の要素をHaar離散ウェーブレット変換したときの係数(coefficient)を返す関数です。
+ * もし input の要素数が2のn乗でないときは最小の2のn乗まで要素の後ろにゼロ詰めを行ってから処理します。
+ */
+object WaveletCoeffsFun extends DoubleValueWithIndexFunction {
+  /**
+   * 入力窓内の要素の key 毎に value をHaar離散ウェーブレット変換したときの coefficient を返します。
+   * もし rdd の key 毎の要素数が2のn乗でないときは最小の2のn乗まで要素の後ろにゼロ詰めを行ってから処理します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, coefficients) key: 集約キー, coefficients: 係数です。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index: Ordering](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, Seq[Double])] = {
+    require(rdd != null, "rdd is null")
+    rdd.groupByKeyAndSortValues(checkValue).mapValues {
+      values =>
+        val data = nextPowerOf2(values.size) match {
+          case size if size == values.size =>
+            values.toArray.map(_._2)
+          case size =>
+            val newValues = new Array[Double](size)
+            values.toSeq.map(_._2).copyToArray(newValues)
+            newValues
+        }
+        transform(data)
+    }
+  }
+
+  /**
+   * Haar離散ウェーブレット変換を行います。
+   * @param values 計算対象値です。要素数が2のn乗となっていることを期待します。
+   * @return  係数です。
+   */
+  def transform(values: Seq[Double]): Seq[Double] = {
+    @tailrec def transform(values: Seq[Double], coefficients: Seq[Seq[Double]]): Seq[Seq[Double]] = {
+      def +/(index: Int) = (values(2 * index) + values(2 * index + 1)) / 2
+      def -/(index: Int) = (values(2 * index) - values(2 * index + 1)) / 2
+      values.length / 2 match {
+        case 0 => coefficients ++ Seq(values)
+        case 1 => coefficients ++ Seq(Seq(-/(0)), Seq(+/(0)))
+        case length =>
+          val indices = values.indices.dropRight(length)
+          transform(indices.map(+/), coefficients ++ Seq(indices.map(-/)))
+      }
+    }
+    transform(values, Seq()).reverse.flatten
+  }
+}
+
+/**
+ * 入力窓内の要素のヒストグラム(histogram)を返す関数です。
+ * たとえば、lowestUpperBound = 0.1,  highestLowerBound = 0.9, numBins = 10 のとき、ビンを以下のように初期化します。
+ * <ul>
+ * <li>bin 0 [-∞ 〜 0.1)</li>
+ * <li>bin 1 [0.1 〜 0.2)</li>
+ * <li>bin 2 [0.2 〜 0.3)</li>
+ * <li>bin 3 [0.3 〜 0.4)</li>
+ * <li>bin 4 [0.4 〜 0.5)</li>
+ * <li>bin 5 [0.5 〜 0.6)</li>
+ * <li>bin 6 [0.6 〜 0.7)</li>
+ * <li>bin 7 [0.7 〜 0.8)</li>
+ * <li>bin 8 [0.8 〜 0.9)</li>
+ * <li>bin 9 [0.9 〜 +∞)</li>
+ * </ul>
+ *
+ * @constructor lowestUpperBound, highestLowerBound, numBins を指定して処理関数を生成します。
+ * @param lowestUpperBound 最小ビンの上限です。
+ * @param highestLowerBound 最大ビンの下限です。
+ * @param numBins ビン数です。
+ * @throws IllegalArgumentException いかのように lowestUpperBound, highestLowerBound, numBins の値が不正なとき
+ * numBins == 1 かつそのビンのサイズが無限大でないとき
+ * numBins == 2 かつ lowestUpperBound == highestLowerBound でないとき
+ * numBins > 2 かつ lowestUpperBound < highestLowerBound でないとき
+ */
+class HistogramFun(lowestUpperBound: Double = 0.1, highestLowerBound: Double = 0.9, numBins: Int = 10) extends DoubleValueWithIndexFunction with Serializable {
+  require(!lowestUpperBound.isNaN, "lowestUpperBound is Double.NaN")
+  require(!highestLowerBound.isNaN, "highestLowerBound is Double.NaN")
+  require(numBins > 0, s"numBins(=${numBins}) should be >=1")
+
+  /** ビンクラスです。 */
+  case class Bin(lowerBound: Double, upperBound: Double, var count: Int) {
+    def contains(value: Double) = lowerBound <= value && value < upperBound
+  }
+
+  val bins = (numBins match {
+    case 1 =>
+      require(lowestUpperBound == Double.PositiveInfinity && highestLowerBound == Double.NegativeInfinity, s"if numBins == 1 then lowestUpperBound(=${lowestUpperBound}) should be Double.PositiveInfinity")
+      Seq(Double.NegativeInfinity, Double.PositiveInfinity)
+    case 2 =>
+      require(lowestUpperBound == highestLowerBound, s"if numBins == 2 then lowestUpperBound(=${lowestUpperBound}) should equal highestLowerBound(=${highestLowerBound})")
+      Seq(Double.NegativeInfinity, lowestUpperBound, Double.PositiveInfinity)
+    case _ =>
+      require(highestLowerBound > lowestUpperBound, s"lowestUpperBound(=${lowestUpperBound}) < highestLowerBound(=${highestLowerBound})")
+      val rangeWidth = highestLowerBound - lowestUpperBound
+      val rangeBins = numBins - 2
+      val width = rangeWidth / rangeBins
+      Double.NegativeInfinity +: (0 to rangeBins).map(_ * width + lowestUpperBound) :+ Double.PositiveInfinity
+  }).sliding(2).map { case Seq(lowerBound, upperBound) => Bin(lowerBound, upperBound, 0) }.toArray
+
+  /**
+   * 入力窓内の要素の key 毎に value のヒストグラムを返します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, histogram) key: 集約キー, histogram: ヒストグラムです。histogram.sum = 1.0 となるように正規化されています。
+   * @throws IllegalArgumentException rdd == null または rdd の要素に Double.NaN, Double.NegativeInfinity, Double.PositiveInfinity があるとき
+   */
+  def apply[_Key: ClassTag, _Index: Ordering](rdd: RDD[(_Key, (_Index, Double))]): RDD[(_Key, Seq[Double])] = {
+    require(rdd != null, "rdd is null")
+
+    // 分散処理されるときに bins が複製されることを期待。
+    rdd.aggregateByKey(bins.clone)(
+      (aggregate, valueWithIndex) => {
+        val value = checkValue(valueWithIndex)._2;
+        aggregate.find(_.contains(value)) match {
+          case Some(bin) => bin.count += 1
+          case None      => ()
+        }
+        aggregate
+      },
+      (left, right) => {
+        (left zip right).foreach { case (l, r) => l.count += r.count }
+        left
+      }
+    ).mapValues {
+        bins =>
+          val count = bins.map(_.count).sum
+          bins.map(_.count.toDouble / count)
+      }
+  }
+}
+
+/**
+ * 入力窓内の要素を文字列として連結したものを返す関数です。
+ *
+ * @constructor セパレータを指定して関数を初期化します。
+ * @param separator セパレータです。
+ * @throws IllegalArgumentException separator == null のとき
+ */
+class ConcatFun(separator: String = " ") extends StringValueWithIndexFunction with Serializable {
+  require(separator != null, "separator is null")
+
+  /**
+   * 入力窓内の要素の key 毎に index 順で text を結合します。
+   * text の間には separator を結合します。
+   * もし、 text.isEmpty のときはその要素をスキップし結合しません。
+   * @param rdd 入力窓 要素(key, (index, text)) key: 集約キー, index: 結合順序, text: 対象テキスト。
+   * @return 結果 (key, text) key: 集約キー, text 結果テキスト
+   * @throws IllegalArgumentException rdd == null のとき
+   * @throws NullPointerException text == null のとき。
+   */
+  def apply[_Key: ClassTag, _Index: Ordering](rdd: RDD[(_Key, (_Index, String))]): RDD[(_Key, String)] = {
+    require(rdd != null, "rdd is null")
+
+    rdd.groupByKeyAndSortValues(checkValue).mapValues {
+      _.foldLeft(new StringBuilder) {
+        case (aggregate, (index, text)) => (aggregate.length, text) match {
+          case (_, "")   => aggregate
+          case (0, text) => aggregate.append(text)
+          case (_, text) => aggregate.append(separator).append(text)
+        }
+      }.toString()
+    }
+  }
+}
+
+/**
+ * 入力窓内の要素のなかで最も高頻度で現れる要素を返す関数です。
+ */
+object MaxElemFun {
+  /**
+   * 入力窓内の要素の key 毎に最高頻度の value を返します。
+   * 空文字も value の一種類として頻度を計算します。
+   * @param rdd 入力窓 要素(key, (index, value)) key: 集約キー, index: 順序, value: 計算対象値です。
+   * @return 結果 (key, value) key: 集約キー, value 計算対象値です。もし同頻度の計算対象値が複数あったときは index が最大の計算対象値を返します。
+   * @throws IllegalArgumentException rdd == null のとき
+   * @throws NullPointerException value == null のとき
+   */
+  def apply[_Key: ClassTag, _Index: Ordering, _Value: Ordering](rdd: RDD[(_Key, (_Index, _Value))]): RDD[(_Key, _Value)] = {
+    require(rdd != null, "rdd is null")
+
+    /** 頻度集計カウンタクラスです。 */
+    case class Counter(var maxIndex: _Index, var count: Int) { def +=(operand: Int) = count += operand }
+
+    /** 頻度 → index 順序定義 */
+    implicit val counterOrdering: Ordering[Counter] = new Ordering[Counter] {
+      import scala.math.Ordered.orderingToOrdered
+      override def compare(left: Counter, right: Counter) = left.count - right.count match {
+        case 0     => left.maxIndex.compare(right.maxIndex)
+        case other => other
+      }
+    }
+
+    import scala.collection.mutable.HashMap
+
+    def update(aggregate: HashMap[_Value, Counter], index: _Index, value: _Value, operand: Int): Unit = {
+      import scala.math.Ordered.orderingToOrdered
+      val element = aggregate.getOrElseUpdate(value, Counter(index, 0))
+      if (element.maxIndex < index) element.maxIndex = index
+      element += operand
+    }
+
+    rdd.aggregateByKey(new HashMap[_Value, Counter])(
+      {
+        case (_, (index, null)) => throw new NullPointerException(s"(${index},null)'s value is null")
+        case (aggregate, (index, value)) =>
+          update(aggregate, index, value, 1)
+          aggregate
+      }, (left, right) => {
+        right.foreach {
+          case (key, Counter(index, count)) =>
+            update(left, index, key, count)
+        }
+        left
+      }
+    ).mapValues(_.maxBy { case (value, counter) => (counter, value) }._1)
+  }
+
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/OrderedValueRDDFunctions.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/OrderedValueRDDFunctions.scala
new file mode 100644
index 0000000..0247e50
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/udf/OrderedValueRDDFunctions.scala
@@ -0,0 +1,81 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.udf
+
+import scala.collection.SortedSet
+import scala.collection.immutable
+import scala.collection.mutable.TreeSet
+import scala.language.implicitConversions
+import scala.reflect.ClassTag
+import org.apache.spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.PairRDDFunctions
+import com.typesafe.scalalogging.slf4j.LazyLogging
+import org.apache.spark.SparkException
+
+/**
+ * RDD に Ordering[_Value] で順序付するための関数です。
+ */
+class OrderedValueRDDFunctions[_Key, _Value](self: RDD[(_Key, _Value)])(implicit kt: ClassTag[_Key], vt: ClassTag[_Value], valueOrdering: Ordering[_Value]) extends Serializable {
+
+  /**
+   * Group the values for each key in the RDD into a single sorted sequence.
+   *
+   * Note: This operation may be very expensive.
+   * If you are grouping in order to perform an aggregation (such as a sum or average) over each key,
+   * using PairRDDFunctions.aggregateByKey or PairRDDFunctions#reduceByKey will provide much better performance.
+   */
+  def groupByKeyAndSortValues(filter: _Value => _Value): RDD[(_Key, SortedSet[_Value])] = {
+    /**
+     * 2つの [[SortedSet]] をマージした [[SortedSet]] です。
+     */
+    class MergeSortedSet(left: SortedSet[_Value], right: SortedSet[_Value]) extends immutable.SortedSet[_Value] with Serializable with LazyLogging {
+      override def size = left.size + right.size
+      implicit def ordering = valueOrdering
+
+      private def merge(left: BufferedIterator[_Value], right: BufferedIterator[_Value]): Iterator[_Value] =
+        new scala.collection.Iterator[_Value] with Serializable {
+          import valueOrdering.mkOrderingOps
+          override def hasNext = left.hasNext || right.hasNext
+          override def next(): _Value =
+            if (left.isEmpty) right.next()
+            else if (right.isEmpty) left.next()
+            else if (left.head < right.head) left.next()
+            else right.next()
+        }
+
+      def iterator: Iterator[_Value] = merge(left.iterator.buffered, right.iterator.buffered)
+
+      def -(elem: _Value): immutable.SortedSet[_Value] = throw new UnsupportedOperationException
+      def +(elem: _Value): immutable.SortedSet[_Value] = throw new UnsupportedOperationException
+      def contains(elem: _Value): Boolean = throw new UnsupportedOperationException
+      def rangeImpl(from: Option[_Value], until: Option[_Value]): immutable.SortedSet[_Value] = throw new UnsupportedOperationException
+    }
+
+    self.combineByKey(
+      (value) => (new TreeSet[_Value]() += filter(value)),
+      (collect, value) => collect.asInstanceOf[TreeSet[_Value]] += filter(value),
+      (left, right) => new MergeSortedSet(left, right)
+    )
+  }
+}
+
+/**
+ * implicit conversion 用
+ */
+object OrderedValueRDDFunctions {
+  implicit def rddToOrderedValueRDDFunctions[_Key, _Value](rdd: RDD[(_Key, _Value)])(implicit kt: ClassTag[_Key], vt: ClassTag[_Value], valueOrdering: Ordering[_Value] = null) = new OrderedValueRDDFunctions(rdd)
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Anomaly.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Anomaly.scala
index 0575deb..fead403 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Anomaly.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Anomaly.scala
@@ -15,26 +15,99 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.updater
 
-import us.jubat.jubaql_server.processor.CreateModel
-import org.json4s._
+import us.jubat.jubaql_server.processor.{DatumExtractor, CreateModel}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.types.{DataType, DoubleType}
 import us.jubat.anomaly.AnomalyClient
+import scala.collection.concurrent
 
-class Anomaly(val jubaHost: String, jubaPort: Int, cm: CreateModel, val keys: List[String]) extends Updater with Serializable {
-  override def apply(iter: Iterator[JValue], statusUrl: String): Iterator[Unit] = {
-    HttpClientPerJvm.startChecking(statusUrl)
+class Anomaly(val jubaHost: String, jubaPort: Int, cm: CreateModel, featureFunctions: concurrent.Map[String, String])
+  extends JubatusClient with Serializable {
+
+  // TODO wrap all network calls to Jubatus in try{} blocks
+
+  override def update(rowSchema: Map[String, (Int, DataType)],
+                      iter: Iterator[Row]): Unit = {
+    // set up Jubatus client and logger
+    val client = new AnomalyClient(jubaHost, jubaPort, cm.modelName, 5)
+    val logger = createLogger
+    logger.info(s"started AnomalyClient: $client")
+
+    // loop over the data as long as the Spark driver is not stopped
+    var stopped_? = HttpClientPerJvm.stopped
+    if (stopped_?) {
+      logger.debug("driver status is 'stopped', skip processing")
+    }
+    var batchStartTime = System.currentTimeMillis()
+    iter.takeWhile(_ => !stopped_?).zipWithIndex.foreach { case (row, idx) => {
+      // create datum and send to Jubatus
+      val d = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+      retry(2, logger)(client.add(d))
+
+      // every 1000 items, check if the Spark driver is still running
+      if ((idx + 1) % 1000 == 0) {
+        val duration = System.currentTimeMillis() - batchStartTime
+        logger.debug(s"processed 1000 items using 'add' method in $duration ms")
+        stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', end processing")
+        }
+        batchStartTime = System.currentTimeMillis()
+      }
+    }
+    }
+  }
+
+  override def analyzeMethod(rpcName: String) = {
+    if (rpcName != "calc_score") {
+      createLogger.warn(s"unknown RPC method: '$rpcName', using 'calc_score'")
+    }
+    val returnType = DoubleType
+    (returnType, calcScore _)
+  }
+
+  def calcScore(rowSchema: Map[String, (Int, DataType)],
+                iter: Iterator[Row]): Iterator[Row] = {
+    // set up Jubatus client and logger
     val client = new AnomalyClient(jubaHost, jubaPort, cm.modelName, 5)
     val logger = createLogger
     logger.info(s"started AnomalyClient: $client")
+
+    // loop over the data as long as the Spark driver is not stopped
     var stopped_? = HttpClientPerJvm.stopped
-    val out = iter.takeWhile(_ => !stopped_?).zipWithIndex.map(valueWithIndex => {
-      val (jvalue, idx) = valueWithIndex
-      client.add(extractDatum(keys, jvalue))
-      if ((idx+1) % 1000 == 0) {
-        logger.debug("processed 1000 items using 'add' method")
+    if (stopped_?) {
+      logger.debug("driver status is 'stopped', skip processing")
+    }
+    var batchStartTime = System.currentTimeMillis()
+    iter.takeWhile(_ => !stopped_?).zipWithIndex.map { case (row, idx) => {
+      // convert to datum and compute score via Jubatus
+      val datum = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+      // if we return a Float here, this will result in casting exceptions
+      // during processing, so we convert to double
+      val score = retry(2, logger)(client.calcScore(datum).toDouble)
+      // we may get an Infinity result if this row is identical to too many
+      // other items, cf. <https://github.com/jubatus/jubatus_core/issues/130>.
+      // we assume 1.0 instead to avoid weird behavior in the future if the
+      // infinity value appeared in the row.
+      val adjustedScore = if (score.isInfinite) {
+        1.0
+      } else {
+        score
+      }
+
+      // every 1000 items, check if the Spark driver is still running
+      if ((idx + 1) % 1000 == 0) {
+        val duration = System.currentTimeMillis() - batchStartTime
+        logger.debug(s"processed 1000 items using 'calc_score' method in $duration ms")
         stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', end processing")
+        }
+        batchStartTime = System.currentTimeMillis()
       }
-      ()
-    })
-    out
+
+      Row.fromSeq(row :+ adjustedScore)
+    }
+    }
   }
 }
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Classifier.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Classifier.scala
index 807e95d..88290ac 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Classifier.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Classifier.scala
@@ -15,38 +15,140 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.updater
 
-import us.jubat.jubaql_server.processor.CreateModel
-import org.json4s._
+import us.jubat.jubaql_server.processor.{DatumExtractor, CreateModel}
+import us.jubat.jubaql_server.processor.json.ClassifierPrediction
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.types._
 import us.jubat.classifier.{ClassifierClient, LabeledDatum}
+import us.jubat.common.Datum
 
-class Classifier(val jubaHost: String, jubaPort: Int, cm: CreateModel, val keys: List[String]) extends Updater with Serializable {
-  override def apply(iter: Iterator[JValue], statusUrl: String): Iterator[Unit] = {
-    HttpClientPerJvm.startChecking(statusUrl)
+import scala.collection.JavaConversions._
+import scala.collection.concurrent
+import scala.util.{Failure, Success, Try}
+
+class Classifier(val jubaHost: String, jubaPort: Int, cm: CreateModel,
+                 featureFunctions: concurrent.Map[String, String],
+                 val labelColumnName: String)
+  extends JubatusClient with Serializable {
+
+  // TODO wrap all network calls to Jubatus in try{} blocks
+
+  override def update(rowSchema: Map[String, (Int, DataType)],
+                      iter: Iterator[Row]): Unit = {
+    // set up Jubatus client and logger
     val client = new ClassifierClient(jubaHost, jubaPort, cm.modelName, 5)
     val logger = createLogger
     logger.info(s"started ClassifierClient: $client")
-    val label = cm.specifier.toMap.get("label") match {
-      case Some(la :: Nil) => la
-      case _ => ??? // TODO: throw exception
+
+    // get the index of the label column (if it exists)
+    rowSchema.get(labelColumnName) match {
+      case None =>
+        val msg = "the given schema %s does not contain a column named '%s'".format(
+          rowSchema, labelColumnName)
+        logger.error(msg)
+        throw new RuntimeException(msg)
+
+      case Some((labelIdx, _)) =>
+        // loop over the data as long as the Spark driver is not stopped
+        var stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', skip processing")
+        }
+        var batchStartTime = System.currentTimeMillis()
+        // TODO we can make this more efficient using batch training
+        iter.takeWhile(_ => !stopped_?).zipWithIndex.foreach { case (row, idx) => {
+          if (!row.isNullAt(labelIdx)) {
+            // create datum and send to Jubatus
+            val labelValue = row.getString(labelIdx)
+            val datum = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+            val labelDatum = new LabeledDatum(labelValue, datum)
+            val datumList = new java.util.LinkedList[LabeledDatum]()
+            datumList.add(labelDatum)
+            retry(2, logger)(client.train(datumList))
+
+            // every 1000 items, check if the Spark driver is still running
+            if ((idx + 1) % 1000 == 0) {
+              val duration = System.currentTimeMillis() - batchStartTime
+              logger.debug(s"processed 1000 items using 'train' method in $duration ms")
+              stopped_? = HttpClientPerJvm.stopped
+              if (stopped_?) {
+                logger.debug("driver status is 'stopped', end processing")
+              }
+              batchStartTime = System.currentTimeMillis()
+            }
+          } else {
+            logger.warn("row %s has a NULL label".format(row))
+          }
+        }
+        }
     }
+  }
+
+  override def analyzeMethod(rpcName: String) = {
+    if (rpcName != "classify") {
+      createLogger.warn(s"unknown RPC method: '$rpcName', using 'classify'")
+    }
+    // maybe we want to change this to MapType later on
+    val returnType = ArrayType(StructType(
+      StructField("label", StringType, nullable = false) ::
+        StructField("score", DoubleType, nullable = false) ::
+        Nil
+    ))
+    (returnType, classify _)
+  }
+
+  def classify(rowSchema: Map[String, (Int, DataType)],
+               iter: Iterator[Row]): Iterator[Row] = {
+    // set up Jubatus client and logger
+    val client = new ClassifierClient(jubaHost, jubaPort, cm.modelName, 5)
+    val logger = createLogger
+    logger.info(s"started ClassifierClient: $client")
+
+    // loop over the data as long as the Spark driver is not stopped
     var stopped_? = HttpClientPerJvm.stopped
-    val out = iter.takeWhile(_ => !stopped_?).zipWithIndex.map(valueWithIndex => {
-      val (jvalue, idx) = valueWithIndex
-      // find string for label
-      jvalue \ label match {
-        case JString(trainLabel) =>
-          val data = new java.util.LinkedList[LabeledDatum]()
-          data.add(new LabeledDatum(trainLabel, extractDatum(keys, jvalue)))
-          client.train(data)
-          if ((idx+1) % 1000 == 0) {
-            logger.debug("processed 1000 items using 'train' method")
-            stopped_? = HttpClientPerJvm.stopped
+    if (stopped_?) {
+      logger.debug("driver status is 'stopped', skip processing")
+    }
+    var batchStartTime = System.currentTimeMillis()
+    iter.takeWhile(_ => !stopped_?).zipWithIndex.flatMap { case (row, idx) => {
+      // TODO we can make this more efficient using batch training
+      // convert to datum
+      val datum = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+      val datumList = new java.util.LinkedList[Datum]()
+      datumList.add(datum)
+      // classify
+      val maybeClassifierResult = retry(2, logger)(client.classify(datumList).toList) match {
+        case labeledDatumList :: rest =>
+          if (!rest.isEmpty) {
+            logger.warn("received more than one result from classifier, " +
+              "ignoring all but the first")
           }
-        case _ =>
-        // `label` string field not found
+          if (labeledDatumList.isEmpty) {
+            logger.warn("got an empty classification list for datum")
+          }
+          Some(labeledDatumList.map(labeledDatum => {
+            ClassifierPrediction(labeledDatum.label, labeledDatum.score)
+          }).toList)
+        case Nil =>
+          logger.error("received no result from classifier")
+          None
+      }
+
+      // every 1000 items, check if the Spark driver is still running
+      if ((idx + 1) % 1000 == 0) {
+        val duration = System.currentTimeMillis() - batchStartTime
+        logger.debug(s"processed 1000 items using 'classify' method in $duration ms")
+        stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', end processing")
+        }
+        batchStartTime = System.currentTimeMillis()
       }
-      ()
-    })
-    out
+      maybeClassifierResult.map(classifierResult => {
+        Row.fromSeq(row :+ classifierResult.map(r =>
+          Row.fromSeq(r.productIterator.toSeq)))
+      })
+    }
+    }
   }
 }
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/HttpClientPerJvm.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/HttpClientPerJvm.scala
index b11383d..085459d 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/HttpClientPerJvm.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/HttpClientPerJvm.scala
@@ -26,7 +26,9 @@ object HttpClientPerJvm extends LazyLogging {
 
   protected var _stopped = false
 
-  def stopped: Boolean = _stopped
+  protected var _interrupted = false
+
+  def stopped: Boolean = _stopped || _interrupted
 
   def startChecking(statusUrl: String) = synchronized {
     // only start the check if none is running already
@@ -43,11 +45,44 @@ object HttpClientPerJvm extends LazyLogging {
               synchronized {
                 _stopped = true
               }
+            case Some("stop-and-poll") if !_interrupted =>
+              // if we see stop-and-poll state, we must tell Jubatus
+              // instances to stop running, but the polling should
+              // continue until we see either "shutdown" or "running".
+              // in both cases we should exit this loop, but in the
+              // latter case we should reset `running` and `_stopped`
+              // to their initial values.
+              logger.debug("status switched to 'stop-and-poll', stopping")
+              synchronized {
+                _interrupted = true
+              }
+            case Some("running") if _interrupted =>
+              // in this state, the server has switched from "stop-and-poll"
+              // to "running", i.e. we should set this object back to its
+              // initial state after breaking out of the loop
+              logger.debug("status switched back to 'running', resetting")
+              synchronized {
+                _stopped = true
+              }
             case _ =>
           }
-          Thread.sleep(1000)
+          if (!_interrupted && !_stopped) {
+            Thread.sleep(1000)
+          } else if (_interrupted && !_stopped) {
+            // poll more frequently when we are waiting to get back to
+            // "running" state
+            Thread.sleep(300)
+          } else {
+            // when we are stopped, exit immediately
+          }
         }
         h.shutdown()
+        if (_stopped && _interrupted) {
+          _stopped = false
+          _interrupted = false
+          running = false
+          logger.debug("HTTP poller reset to initial state")
+        }
       }
       // indicate that we are running now
       running = true
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/JubatusClient.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/JubatusClient.scala
new file mode 100644
index 0000000..4cbb89d
--- /dev/null
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/JubatusClient.scala
@@ -0,0 +1,154 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.updater
+
+import scala.collection.{immutable, mutable}
+import com.typesafe.scalalogging.slf4j.Logger
+import us.jubat.jubaql_server.processor._
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.{Row, StructType}
+import org.slf4j.LoggerFactory
+import us.jubat.common.Datum
+
+trait JubatusClient {
+  /**
+   * Update the model using the given data with the given schema.
+   *
+   * This method will iterate over the given data, taking column names
+   * and types from the given schema. For each item in the iterator,
+   * a datum will be extracted using the rules given when creating the
+   * model and the model will be updated with an "appropriate" method.
+   */
+  def update(schema: StructType, iter: Iterator[Row], statusUrl: String): Unit = {
+    // create a mapping (column name -> (column index, column type))
+    val name2rowDesc = (schema.fields.zipWithIndex.map { case (field, i) => {
+      (field.name, (i, field.dataType))
+    }
+    }).toMap
+    // set up the status poll with the Spark driver
+    HttpClientPerJvm.startChecking(statusUrl)
+    update(name2rowDesc, iter)
+  }
+
+  protected def update(colNameToColIndexAndType: Map[String, (Int, DataType)],
+                       iter: Iterator[Row]): Unit
+
+
+  /**
+   * Get a function to batch analyze a model using the given RPC method.
+   *
+   * The returned function will iterate over the given data, taking
+   * column names and types from the given schema. Each item in the
+   * iterator will be used as input to the given RPC method, using the
+   * conversion rules given when creating the model (if applicable).
+   *
+   * @param rpcName
+   * @return
+   */
+  def getAnalyzeMethod(rpcName: String):
+  (DataType, (StructType, Iterator[Row], String) => Iterator[Row]) = {
+    // get the actual method from a subclass
+    val (returnType, actualMethod) = analyzeMethod(rpcName)
+    (returnType, (schema, iter, statusUrl) => {
+      // create a mapping (column name -> (column index, column type))
+      val name2rowDesc = (schema.fields.zipWithIndex.map { case (field, i) => {
+        (field.name, (i, field.dataType))
+      }
+      }).toMap
+      // set up the status poll with the Spark driver
+      HttpClientPerJvm.startChecking(statusUrl)
+      actualMethod(name2rowDesc, iter)
+    })
+  }
+
+  protected def analyzeMethod(rpcName: String):
+  (DataType, (Map[String, (Int, DataType)], Iterator[Row]) => Iterator[Row])
+
+  protected def createLogger: Logger = {
+    Logger(LoggerFactory getLogger getClass.getName)
+  }
+
+  protected def extractIdOrLabel(colName: String,
+                                 schema: Map[String, (Int, DataType)],
+                                 row: Row,
+                                 logger: Logger): String = {
+    schema.get(colName) match {
+      // if the schema does not contain one of the given columns, this
+      // is an error independent of the current row and should not be ignored
+      case None =>
+        val msg = "the given schema %s does not contain a column named '%s'".format(
+          schema, colName)
+        logger.error(msg)
+        throw new RuntimeException(msg)
+
+      // otherwise, extract the value in that column and return as string
+      case Some((rowIdx, dataType)) =>
+        dataType match {
+          case _ if row.isNullAt(rowIdx) =>
+            // a null value is a property of one particular data value, so
+            // we will just ignore this value and continue with the next
+            logger.warn(s"row $row has a NULL value in column $colName")
+            "NULL"
+          case StringType =>
+            row.getString(rowIdx)
+          case DoubleType =>
+            row.getDouble(rowIdx).toString
+          case IntegerType =>
+            row.getInt(rowIdx).toString
+          case LongType =>
+            row.getLong(rowIdx).toString
+          case FloatType =>
+            row.getFloat(rowIdx).toString
+          case ShortType =>
+            row.getShort(rowIdx).toString
+          case other => // DecimalType, StructType, ...
+            logger.warn(s"cannot add value of type '$other' in row $row to datum")
+            row.getString(rowIdx)
+        }
+    }
+  }
+
+  protected def retry[T](maxRetry: Int, logger: => Logger)(func: => T): T = {
+    try {
+      func
+    } catch {
+      case e: Throwable if maxRetry > 0 =>
+        logger.warn(s"caught $e, retrying max. $maxRetry times")
+        retry(maxRetry-1, logger)(func)
+    }
+  }
+}
+
+/*
+ * Actually, we would like to use the function
+ *   iter => updater.apply(schema, iter, statusUrl),
+ * but this seems to close over more than is visible from that line,
+ * so that the function becomes unserializable, even though all components
+ * that we use are serializable.
+ *
+ * Our workaround is to define the case class below, which does not require
+ * a closure over local variables. (Actually, it is exactly the same
+ * class that *should* be generated by the compiler.)
+ */
+case class UpdaterApplyWrapper(schema: StructType, statusUrl: String, client: JubatusClient) {
+  def apply(iter: Iterator[Row]) = client.update(schema, iter, statusUrl)
+}
+
+case class UpdaterAnalyzeWrapper(schema: StructType, statusUrl: String, client: JubatusClient, rpcName: String) {
+  val (dataType, analyzeMethod) = client.getAnalyzeMethod(rpcName)
+
+  def apply(iter: Iterator[Row]) = analyzeMethod(schema, iter, statusUrl)
+}
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Recommender.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Recommender.scala
index e0bb245..b398df8 100644
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Recommender.scala
+++ b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Recommender.scala
@@ -15,33 +15,164 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor.updater
 
-import us.jubat.jubaql_server.processor.CreateModel
-import org.json4s._
+import us.jubat.jubaql_server.processor.{CreateModel, DatumExtractor}
+import us.jubat.jubaql_server.processor.json.DatumResult
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.types._
+import us.jubat.common.Datum
 import us.jubat.recommender.RecommenderClient
 
-class Recommender(val jubaHost: String, jubaPort: Int, cm: CreateModel, val id: String, val keys: List[String]) extends Updater with Serializable {
-  override def apply(iter: Iterator[JValue], statusUrl: String): Iterator[Unit] = {
-    HttpClientPerJvm.startChecking(statusUrl)
+import scala.collection.JavaConversions._
+import scala.collection.concurrent
+
+class Recommender(val jubaHost: String, jubaPort: Int, cm: CreateModel,
+                  featureFunctions: concurrent.Map[String, String],
+                  val idColumnName: String)
+  extends JubatusClient with Serializable {
+
+  // TODO wrap all network calls to Jubatus in try{} blocks
+
+  override def update(rowSchema: Map[String, (Int, DataType)],
+                      iter: Iterator[Row]): Unit = {
+    // set up Jubatus client and logger
     val client = new RecommenderClient(jubaHost, jubaPort, cm.modelName, 5)
     val logger = createLogger
     logger.info(s"started RecommenderClient: $client")
-    var stopped_? = HttpClientPerJvm.stopped
-    val out = iter.takeWhile(_ => !stopped_?).zipWithIndex.map(valueWithIndex => {
-      val (jvalue, idx) = valueWithIndex
-      // update_row
-      jvalue \ id match {
-        case JString(updateId) =>
-          val datum = extractDatum(keys, jvalue)
-          client.updateRow(updateId, datum)
-          if ((idx+1) % 1000 == 0) {
-            logger.debug("processed 1000 items using 'updateRow' method")
-            stopped_? = HttpClientPerJvm.stopped
+
+    // get the index of the id column (if it exists)
+    rowSchema.get(idColumnName) match {
+      case None =>
+        val msg = "the given schema %s does not contain a column named '%s'".format(
+          rowSchema, idColumnName)
+        logger.error(msg)
+        throw new RuntimeException(msg)
+
+      case Some((idIdx, _)) =>
+        // loop over the data as long as the Spark driver is not stopped
+        var stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', skip processing")
+        }
+        var batchStartTime = System.currentTimeMillis()
+        iter.takeWhile(_ => !stopped_?).zipWithIndex.foreach { case (row, idx) => {
+          if (!row.isNullAt(idIdx)) {
+            // create datum and send to Jubatus
+            val idValue = row.getString(idIdx)
+            val datum = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+            retry(2, logger)(client.updateRow(idValue, datum))
+
+            // every 1000 items, check if the Spark driver is still running
+            if ((idx + 1) % 1000 == 0) {
+              val duration = System.currentTimeMillis() - batchStartTime
+              logger.debug(s"processed 1000 items using 'updateRow' method in $duration ms")
+              stopped_? = HttpClientPerJvm.stopped
+              if (stopped_?) {
+                logger.debug("driver status is 'stopped', end processing")
+              }
+              batchStartTime = System.currentTimeMillis()
+            }
+          } else {
+            logger.warn("row %s has a NULL id".format(row))
           }
-        case _ =>
-        // `id` string field not found
+        }
+        }
+    }
+  }
+
+  override def analyzeMethod(rpcName: String) = {
+    val returnType = StructType(
+      StructField("string_values", MapType(StringType, StringType, valueContainsNull = false), nullable = true) ::
+        StructField("num_values", MapType(StringType, DoubleType, valueContainsNull = false), nullable = true) ::
+        Nil
+    )
+    if (rpcName == "complete_row_from_id") {
+      (returnType, (rowSchema, iter) => completeRowFromId(rowSchema, iter))
+    } else if (rpcName == "complete_row_from_datum") {
+      (returnType, completeRowFromDatum)
+    } else {
+      val msg = s"unknown RPC method: '$rpcName'"
+      createLogger.warn(msg)
+      throw new RuntimeException(msg)
+    }
+  }
+
+  def completeRowFromId(rowSchema: Map[String, (Int, DataType)],
+                        iter: Iterator[Row]): Iterator[Row] = {
+    // set up Jubatus client and logger
+    val client = new RecommenderClient(jubaHost, jubaPort, cm.modelName, 5)
+    val logger = createLogger
+    logger.info(s"started RecommenderClient: $client")
+
+    // loop over the data as long as the Spark driver is not stopped
+    var stopped_? = HttpClientPerJvm.stopped
+    if (stopped_?) {
+      logger.debug("driver status is 'stopped', skip processing")
+    }
+    var batchStartTime = System.currentTimeMillis()
+    iter.takeWhile(_ => !stopped_?).zipWithIndex.map { case (row, idx) => {
+
+      val id = extractIdOrLabel(idColumnName, rowSchema, row, logger)
+      val fullDatum = retry(2, logger)(client.completeRowFromId(id))
+      val wrappedFullDatum = datumToJson(fullDatum)
+
+      // every 1000 items, check if the Spark driver is still running
+      if ((idx + 1) % 1000 == 0) {
+        val duration = System.currentTimeMillis() - batchStartTime
+        logger.debug(s"processed 1000 items using 'complete_row_from_id' method in $duration ms")
+        stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', end processing")
+        }
+        batchStartTime = System.currentTimeMillis()
+      }
+
+      // we must add a nested row (not case class) to allow for nested queries
+      Row.fromSeq(row :+ Row.fromSeq(wrappedFullDatum.productIterator.toSeq))
+    }
+    }
+  }
+
+  def completeRowFromDatum(rowSchema: Map[String, (Int, DataType)],
+                           iter: Iterator[Row]): Iterator[Row] = {
+    // set up Jubatus client and logger
+    val client = new RecommenderClient(jubaHost, jubaPort, cm.modelName, 5)
+    val logger = createLogger
+    logger.info(s"started RecommenderClient: $client")
+
+    // loop over the data as long as the Spark driver is not stopped
+    var stopped_? = HttpClientPerJvm.stopped
+    if (stopped_?) {
+      logger.debug("driver status is 'stopped', skip processing")
+    }
+    var batchStartTime = System.currentTimeMillis()
+    iter.takeWhile(_ => !stopped_?).zipWithIndex.map { case (row, idx) => {
+
+      // convert to datum
+      val datum = DatumExtractor.extract(cm, rowSchema, row, featureFunctions, logger)
+      val fullDatum = retry(2, logger)(client.completeRowFromDatum(datum))
+      val wrappedFullDatum = datumToJson(fullDatum)
+
+      // every 1000 items, check if the Spark driver is still running
+      if ((idx + 1) % 1000 == 0) {
+        val duration = System.currentTimeMillis() - batchStartTime
+        logger.debug(s"processed 1000 items using 'complete_row_from_datum' method in $duration ms")
+        stopped_? = HttpClientPerJvm.stopped
+        if (stopped_?) {
+          logger.debug("driver status is 'stopped', end processing")
+        }
+        batchStartTime = System.currentTimeMillis()
       }
-      ()
-    })
-    out
+
+      // we must add a nested row (not case class) to allow for nested queries
+      Row.fromSeq(row :+ Row.fromSeq(wrappedFullDatum.productIterator.toSeq))
+    }
+    }
+  }
+
+  protected def datumToJson(datum: Datum): DatumResult = {
+    DatumResult(
+      datum.getStringValues().map(v => (v.key, v.value)).toMap,
+      datum.getNumValues().map(v => (v.key, v.value)).toMap
+    )
   }
 }
diff --git a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Updater.scala b/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Updater.scala
deleted file mode 100644
index 00478c1..0000000
--- a/processor/src/main/scala/us/jubat/jubaql_server/processor/updater/Updater.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-// Jubatus: Online machine learning framework for distributed environment
-// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License version 2.1 as published by the Free Software Foundation.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-package us.jubat.jubaql_server.processor.updater
-
-import org.json4s._
-import us.jubat.common.Datum
-import com.typesafe.scalalogging.slf4j.Logger
-import org.slf4j.LoggerFactory
-
-trait Updater {
-  def apply(iter: scala.Iterator[JValue], statusUrl: String): Iterator[Unit]
-
-  protected def createLogger: Logger = {
-    Logger(LoggerFactory getLogger getClass.getName)
-  }
-
-  protected def extractDatum(keys: List[String], jvalue: JValue): Datum = {
-    // filter unused filed
-    val filtered = jvalue.filterField {
-      case JField(key, _) => keys.indexOf(key) >= 0
-      case _ => false
-    }
-
-    var datum = new Datum
-    filtered.foreach({
-      j =>
-        val key = j._1
-        j._2 match {
-          case JInt(v) =>
-            datum.addNumber(key, v.toDouble)
-          case JDouble(v) =>
-            datum.addNumber(key, v)
-          case JString(v) =>
-            datum.addString(key, v)
-          case _ =>
-        }
-        j
-    })
-    datum
-  }
-}
diff --git a/processor/src/test/resources/dummydata/1.json b/processor/src/test/resources/dummydata/1.json
deleted file mode 100644
index b0c72b5..0000000
--- a/processor/src/test/resources/dummydata/1.json
+++ /dev/null
@@ -1,2 +0,0 @@
-{"video_id": 1}
-{"video_id": 2}
diff --git a/processor/src/test/resources/dummydata/2.json b/processor/src/test/resources/dummydata/2.json
deleted file mode 100644
index 9526c4f..0000000
--- a/processor/src/test/resources/dummydata/2.json
+++ /dev/null
@@ -1,2 +0,0 @@
-{"video_id": 3}
-{"video_id": 4}
diff --git a/processor/src/test/resources/dummydata/data.json b/processor/src/test/resources/dummydata/data.json
new file mode 100644
index 0000000..d541d52
--- /dev/null
+++ b/processor/src/test/resources/dummydata/data.json
@@ -0,0 +1,12 @@
+{"gender": "m","age":21 ,"jubaql_timestamp":"2014-11-21T14:52:21.943321112"}
+{"jubaql_timestamp":"2014-11-21T14:52:57.943321112","age":22, "gender": "f"}
+{"age":23 ,"jubaql_timestamp": "2014-11-21T14:53:17.1234","gender": "f"}
+{"gender": "f","jubaql_timestamp":"2014-11-21T14:53:58","age":24 }
+{"age":21 ,"jubaql_timestamp": "2014-11-21T14:53:59","gender": "m"}
+{"gender": "f","jubaql_timestamp":"2014-11-21T14:54:08.12","age":18 }
+{"age":22 ,    "jubaql_timestamp":"2014-11-21T14:54:09","gender": "m"}
+{"gender": "f","jubaql_timestamp":"2014-11-21T14:54:09.5","age":31 }
+{"age":23 ,    "jubaql_timestamp":"2014-11-21T14:54:09.8","gender": "m"}
+{"gender": "m","jubaql_timestamp":"2014-11-21T14:54:12","age":19 }
+{"age":24 ,    "jubaql_timestamp":"2014-11-21T14:54:15","gender": "m"}
+{"gender":"f","age":26,"jubaql_timestamp":"2014-11-21T14:54:27" }
diff --git a/processor/src/test/resources/lof.json b/processor/src/test/resources/lof.json
index 93286fe..4e0cd21 100644
--- a/processor/src/test/resources/lof.json
+++ b/processor/src/test/resources/lof.json
@@ -1,20 +1,4 @@
 {
-  "converter" : {
-    "string_filter_types" : {},
-    "string_filter_rules" : [],
-    "num_filter_types" : {},
-    "num_filter_rules" : [],
-    "string_types": {
-      "unigram": { "method": "ngram", "char_num": "1" }
-    },
-    "string_rules" : [
-      { "key" : "*", "type" : "unigram", "sample_weight" : "bin", "global_weight" : "bin" }
-    ],
-    "num_types" : {},
-    "num_rules" : [
-      { "key" : "*", "type" : "num" }
-    ]
-  },
   "parameter" : {
     "nearest_neighbor_num" : 10,
     "reverse_nearest_neighbor_num" : 30,
diff --git a/processor/src/test/resources/npb_similar_player.json b/processor/src/test/resources/npb_similar_player.json
index f95a474..676c1ea 100644
--- a/processor/src/test/resources/npb_similar_player.json
+++ b/processor/src/test/resources/npb_similar_player.json
@@ -1,16 +1,4 @@
 {
   "method": "inverted_index",
-  "converter": {
-    "string_filter_types": {},
-    "string_filter_rules": [],
-    "num_filter_types": {},
-    "num_filter_rules": [],
-    "string_types": {},
-    "string_rules": [],
-    "num_types": {},
-    "num_rules": [
-      {"key" : "*", "type" : "num"}
-    ]
-  },
   "parameter": {}
 }
diff --git a/processor/src/test/resources/shogun.json b/processor/src/test/resources/shogun.json
index 469b1bc..3f5186b 100644
--- a/processor/src/test/resources/shogun.json
+++ b/processor/src/test/resources/shogun.json
@@ -1,19 +1,5 @@
 {
   "method": "AROW",
-  "converter": {
-    "num_filter_types": {},
-    "num_filter_rules": [],
-    "string_filter_types": {},
-    "string_filter_rules": [],
-    "num_types": {},
-    "num_rules": [],
-    "string_types": {
-      "unigram": { "method": "ngram", "char_num": "1" }
-    },
-    "string_rules": [
-      { "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" }
-    ]
-  },
   "parameter": {
     "regularization_weight" : 1.0
   }
diff --git a/processor/src/test/resources/shogun_alpha_data.json b/processor/src/test/resources/shogun_alpha_data.json
new file mode 100644
index 0000000..dbb509b
--- /dev/null
+++ b/processor/src/test/resources/shogun_alpha_data.json
@@ -0,0 +1,44 @@
+{"label":"徳川","name":"Ieyasu"}
+{"label":"徳川","name":"Hidetada"}
+{"label":"徳川","name":"家光"}
+{"label":"徳川","name":"家綱"}
+{"label":"徳川","name":"綱吉"}
+{"label":"徳川","name":"家宣"}
+{"label":"徳川","name":"家継"}
+{"label":"徳川","name":"吉宗"}
+{"label":"徳川","name":"家重"}
+{"label":"徳川","name":"家治"}
+{"label":"徳川","name":"家斉"}
+{"label":"徳川","name":"家慶"}
+{"label":"徳川","name":"家定"}
+{"label":"徳川","name":"家茂"}
+{"label":"足利","name":"尊氏"}
+{"label":"足利","name":"義詮"}
+{"label":"足利","name":"義満"}
+{"label":"足利","name":"義持"}
+{"label":"足利","name":"義量"}
+{"label":"足利","name":"義教"}
+{"label":"足利","name":"義勝"}
+{"label":"足利","name":"義政"}
+{"label":"足利","name":"義尚"}
+{"label":"足利","name":"義稙"}
+{"label":"足利","name":"義澄"}
+{"label":"足利","name":"義稙"}
+{"label":"足利","name":"義晴"}
+{"label":"足利","name":"義輝"}
+{"label":"足利","name":"義栄"}
+{"label":"北条","name":"時政"}
+{"label":"北条","name":"義時"}
+{"label":"北条","name":"泰時"}
+{"label":"北条","name":"経時"}
+{"label":"北条","name":"時頼"}
+{"label":"北条","name":"長時"}
+{"label":"北条","name":"政村"}
+{"label":"北条","name":"時宗"}
+{"label":"北条","name":"貞時"}
+{"label":"北条","name":"師時"}
+{"label":"北条","name":"宗宣"}
+{"label":"北条","name":"煕時"}
+{"label":"北条","name":"基時"}
+{"label":"北条","name":"高時"}
+{"label":"北条","name":"貞顕"}
diff --git a/processor/src/test/resources/shogun_full.json b/processor/src/test/resources/shogun_full.json
new file mode 100644
index 0000000..469b1bc
--- /dev/null
+++ b/processor/src/test/resources/shogun_full.json
@@ -0,0 +1,20 @@
+{
+  "method": "AROW",
+  "converter": {
+    "num_filter_types": {},
+    "num_filter_rules": [],
+    "string_filter_types": {},
+    "string_filter_rules": [],
+    "num_types": {},
+    "num_rules": [],
+    "string_types": {
+      "unigram": { "method": "ngram", "char_num": "1" }
+    },
+    "string_rules": [
+      { "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" }
+    ]
+  },
+  "parameter": {
+    "regularization_weight" : 1.0
+  }
+}
diff --git a/processor/src/test/resources/shogun_splitted_name_data.json b/processor/src/test/resources/shogun_splitted_name_data.json
new file mode 100644
index 0000000..e948292
--- /dev/null
+++ b/processor/src/test/resources/shogun_splitted_name_data.json
@@ -0,0 +1,44 @@
+{"label":"徳川","name1":"家","name2":"康"}
+{"label":"徳川","name1":"秀","name2":"忠"}
+{"label":"徳川","name1":"家","name2":"光"}
+{"label":"徳川","name1":"家","name2":"綱"}
+{"label":"徳川","name1":"綱","name2":"吉"}
+{"label":"徳川","name1":"家","name2":"宣"}
+{"label":"徳川","name1":"家","name2":"継"}
+{"label":"徳川","name1":"吉","name2":"宗"}
+{"label":"徳川","name1":"家","name2":"重"}
+{"label":"徳川","name1":"家","name2":"治"}
+{"label":"徳川","name1":"家","name2":"斉"}
+{"label":"徳川","name1":"家","name2":"慶"}
+{"label":"徳川","name1":"家","name2":"定"}
+{"label":"徳川","name1":"家","name2":"茂"}
+{"label":"足利","name1":"尊","name2":"氏"}
+{"label":"足利","name1":"義","name2":"詮"}
+{"label":"足利","name1":"義","name2":"満"}
+{"label":"足利","name1":"義","name2":"持"}
+{"label":"足利","name1":"義","name2":"量"}
+{"label":"足利","name1":"義","name2":"教"}
+{"label":"足利","name1":"義","name2":"勝"}
+{"label":"足利","name1":"義","name2":"政"}
+{"label":"足利","name1":"義","name2":"尚"}
+{"label":"足利","name1":"義","name2":"稙"}
+{"label":"足利","name1":"義","name2":"澄"}
+{"label":"足利","name1":"義","name2":"稙"}
+{"label":"足利","name1":"義","name2":"晴"}
+{"label":"足利","name1":"義","name2":"輝"}
+{"label":"足利","name1":"義","name2":"栄"}
+{"label":"北条","name1":"時","name2":"政"}
+{"label":"北条","name1":"義","name2":"時"}
+{"label":"北条","name1":"泰","name2":"時"}
+{"label":"北条","name1":"経","name2":"時"}
+{"label":"北条","name1":"時","name2":"頼"}
+{"label":"北条","name1":"長","name2":"時"}
+{"label":"北条","name1":"政","name2":"村"}
+{"label":"北条","name1":"時","name2":"宗"}
+{"label":"北条","name1":"貞","name2":"時"}
+{"label":"北条","name1":"師","name2":"時"}
+{"label":"北条","name1":"宗","name2":"宣"}
+{"label":"北条","name1":"煕","name2":"時"}
+{"label":"北条","name1":"基","name2":"時"}
+{"label":"北条","name1":"高","name2":"時"}
+{"label":"北条","name1":"貞","name2":"顕"}
diff --git a/processor/src/test/scala/org/apache/spark/LocalSparkContext.scala b/processor/src/test/scala/org/apache/spark/LocalSparkContext.scala
new file mode 100644
index 0000000..dfc109e
--- /dev/null
+++ b/processor/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import _root_.io.netty.util.internal.logging.{ Slf4JLoggerFactory, InternalLoggerFactory }
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.Suite
+
+/** Manages a local `sc` {@link SparkContext} variable, correctly stopping it after each test. */
+trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self: Suite =>
+
+  @transient var sc: SparkContext = _
+
+  override def beforeAll() {
+    InternalLoggerFactory.setDefaultFactory(new Slf4JLoggerFactory())
+    super.beforeAll()
+  }
+
+  override def afterEach() {
+    resetSparkContext()
+    super.afterEach()
+  }
+
+  def resetSparkContext() = {
+    LocalSparkContext.stop(sc)
+    sc = null
+  }
+
+}
+
+object LocalSparkContext {
+  def stop(sc: SparkContext) {
+    if (sc != null) {
+      sc.stop()
+    }
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.driver.port")
+  }
+
+  /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */
+  def withSpark[T](sc: SparkContext)(f: SparkContext => T) = {
+    try {
+      f(sc)
+    } finally {
+      stop(sc)
+    }
+  }
+
+}
diff --git a/processor/src/test/scala/org/apache/spark/SharedSparkContext.scala b/processor/src/test/scala/org/apache/spark/SharedSparkContext.scala
new file mode 100644
index 0000000..3d2700b
--- /dev/null
+++ b/processor/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.Suite
+
+/** Shares a local `SparkContext` between all tests in a suite and closes it at the end */
+trait SharedSparkContext extends BeforeAndAfterAll { self: Suite =>
+
+  @transient private var _sc: SparkContext = _
+
+  def sc: SparkContext = _sc
+
+  var conf = new SparkConf(false)
+
+  override def beforeAll() {
+    _sc = new SparkContext("local[4]", "test", conf)
+    super.beforeAll()
+  }
+
+  override def afterAll() {
+    LocalSparkContext.stop(_sc)
+    _sc = null
+    super.afterAll()
+  }
+}
diff --git a/processor/src/test/scala/org/apache/spark/sql/json/JsonRDDCopy.scala b/processor/src/test/scala/org/apache/spark/sql/json/JsonRDDCopy.scala
new file mode 100644
index 0000000..78fbc64
--- /dev/null
+++ b/processor/src/test/scala/org/apache/spark/sql/json/JsonRDDCopy.scala
@@ -0,0 +1,398 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//
+// This file is based on sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+// from Apache Spark 1.1.1 and incorporates code covered by the following terms:
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE_SPARK file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package org.apache.spark.sql.json
+
+import scala.collection.Map
+import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
+import scala.math.BigDecimal
+
+import com.fasterxml.jackson.databind.ObjectMapper
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.Logging
+
+// The purpose of this copy is to make some private functions
+// publicly available for testing.
+object JsonRDDCopy extends Logging {
+
+  private[sql] def jsonStringToRow(
+                                    json: RDD[String],
+                                    schema: StructType): RDD[Row] = {
+    parseJson(json).map(parsed => asRow(parsed, schema))
+  }
+
+  private[sql] def inferSchema(
+                                json: RDD[String],
+                                samplingRatio: Double = 1.0): StructType = {
+    require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0")
+    val schemaData = if (samplingRatio > 0.99) json else json.sample(false, samplingRatio, 1)
+    val allKeys = parseJson(schemaData).map(allKeysWithValueTypes).reduce(_ ++ _)
+    createSchema(allKeys)
+  }
+
+  /* was: private */ def createSchema(allKeys: Set[(String, DataType)]): StructType = {
+    // Resolve type conflicts
+    val resolved = allKeys.groupBy {
+      case (key, dataType) => key
+    }.map {
+      // Now, keys and types are organized in the format of
+      // key -> Set(type1, type2, ...).
+      case (key, typeSet) => {
+        val fieldName = key.substring(1, key.length - 1).split("`.`").toSeq
+        val dataType = typeSet.map {
+          case (_, dataType) => dataType
+        }.reduce((type1: DataType, type2: DataType) => compatibleType(type1, type2))
+
+        (fieldName, dataType)
+      }
+    }
+
+    def makeStruct(values: Seq[Seq[String]], prefix: Seq[String]): StructType = {
+      val (topLevel, structLike) = values.partition(_.size == 1)
+      val topLevelFields = topLevel.filter {
+        name => resolved.get(prefix ++ name).get match {
+          case ArrayType(StructType(Nil), _) => false
+          case ArrayType(_, _) => true
+          case struct: StructType => false
+          case _ => true
+        }
+      }.map {
+        a => StructField(a.head, resolved.get(prefix ++ a).get, nullable = true)
+      }
+
+      val structFields: Seq[StructField] = structLike.groupBy(_(0)).map {
+        case (name, fields) => {
+          val nestedFields = fields.map(_.tail)
+          val structType = makeStruct(nestedFields, prefix :+ name)
+          val dataType = resolved.get(prefix :+ name).get
+          dataType match {
+            case array: ArrayType =>
+              Some(StructField(name, ArrayType(structType, array.containsNull), nullable = true))
+            case struct: StructType => Some(StructField(name, structType, nullable = true))
+            // dataType is StringType means that we have resolved type conflicts involving
+            // primitive types and complex types. So, the type of name has been relaxed to
+            // StringType. Also, this field should have already been put in topLevelFields.
+            case StringType => None
+          }
+        }
+      }.flatMap(field => field).toSeq
+
+      StructType(
+        (topLevelFields ++ structFields).sortBy {
+          case StructField(name, _, _, _) => name
+        })
+    }
+
+    makeStruct(resolved.keySet.toSeq, Nil)
+  }
+
+  private[sql] def nullTypeToStringType(struct: StructType): StructType = {
+    val fields = struct.fields.map {
+      case StructField(fieldName, dataType, nullable, metadata) => {
+        val newType = dataType match {
+          case NullType => StringType
+          case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull)
+          case ArrayType(struct: StructType, containsNull) =>
+            ArrayType(nullTypeToStringType(struct), containsNull)
+          case struct: StructType =>nullTypeToStringType(struct)
+          case other: DataType => other
+        }
+        StructField(fieldName, newType, nullable, metadata)
+      }
+    }
+
+    StructType(fields)
+  }
+
+  /**
+   * Returns the most general data type for two given data types.
+   */
+  private[json] def compatibleType(t1: DataType, t2: DataType): DataType = {
+    val commonType = HiveTypeCoercion.findTightestCommonType(t1, t2)
+    if (commonType.isDefined) {
+      commonType.get
+    } else {
+      // t1 or t2 is a StructType, ArrayType, or an unexpected type.
+      (t1, t2) match {
+        case (other: DataType, NullType) => other
+        case (NullType, other: DataType) => other
+        case (StructType(fields1), StructType(fields2)) => {
+          val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
+            case (name, fieldTypes) => {
+              val dataType = fieldTypes.map(field => field.dataType).reduce(
+                (type1: DataType, type2: DataType) => compatibleType(type1, type2))
+              StructField(name, dataType, true)
+            }
+          }
+          StructType(newFields.toSeq.sortBy {
+            case StructField(name, _, _, _) => name
+          })
+        }
+        case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
+          ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
+        // TODO: We should use JsonObjectStringType to mark that values of field will be
+        // strings and every string is a Json object.
+        case (_, _) => StringType
+      }
+    }
+  }
+
+  private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
+    ScalaReflection.typeOfObject orElse {
+      // Since we do not have a data type backed by BigInteger,
+      // when we see a Java BigInteger, we use DecimalType.
+      case value: java.math.BigInteger => DecimalType.Unlimited
+      // DecimalType's JVMType is scala BigDecimal.
+      case value: java.math.BigDecimal => DecimalType.Unlimited
+      // Unexpected data type.
+      case _ => StringType
+    }
+  }
+
+  /**
+   * Returns the element type of an JSON array. We go through all elements of this array
+   * to detect any possible type conflict. We use [[compatibleType]] to resolve
+   * type conflicts. Right now, when the element of an array is another array, we
+   * treat the element as String.
+   */
+  private def typeOfArray(l: Seq[Any]): ArrayType = {
+    val containsNull = l.exists(v => v == null)
+    val elements = l.flatMap(v => Option(v))
+    if (elements.isEmpty) {
+      // If this JSON array is empty, we use NullType as a placeholder.
+      // If this array is not empty in other JSON objects, we can resolve
+      // the type after we have passed through all JSON objects.
+      ArrayType(NullType, containsNull)
+    } else {
+      val elementType = elements.map {
+        e => e match {
+          case map: Map[_, _] => StructType(Nil)
+          // We have an array of arrays. If those element arrays do not have the same
+          // element types, we will return ArrayType[StringType].
+          case seq: Seq[_] =>  typeOfArray(seq)
+          case value => typeOfPrimitiveValue(value)
+        }
+      }.reduce((type1: DataType, type2: DataType) => compatibleType(type1, type2))
+
+      ArrayType(elementType, containsNull)
+    }
+  }
+
+  /**
+   * Figures out all key names and data types of values from a parsed JSON object
+   * (in the format of Map[Stirng, Any]). When the value of a key is an JSON object, we
+   * only use a placeholder (StructType(Nil)) to mark that it should be a struct
+   * instead of getting all fields of this struct because a field does not appear
+   * in this JSON object can appear in other JSON objects.
+   */
+  /* was: private */ def allKeysWithValueTypes(m: Map[String, Any]): Set[(String, DataType)] = {
+    val keyValuePairs = m.map {
+      // Quote the key with backticks to handle cases which have dots
+      // in the field name.
+      case (key, value) => (s"`$key`", value)
+    }.toSet
+    keyValuePairs.flatMap {
+      case (key: String, struct: Map[_, _]) => {
+        // The value associated with the key is an JSON object.
+        allKeysWithValueTypes(struct.asInstanceOf[Map[String, Any]]).map {
+          case (k, dataType) => (s"$key.$k", dataType)
+        } ++ Set((key, StructType(Nil)))
+      }
+      case (key: String, array: Seq[_]) => {
+        // The value associated with the key is an array.
+        typeOfArray(array) match {
+          case ArrayType(StructType(Nil), containsNull) => {
+            // The elements of this arrays are structs.
+            array.asInstanceOf[Seq[Map[String, Any]]].flatMap {
+              element => allKeysWithValueTypes(element)
+            }.map {
+              case (k, dataType) => (s"$key.$k", dataType)
+            } :+ (key, ArrayType(StructType(Nil), containsNull))
+          }
+          case ArrayType(elementType, containsNull) =>
+            (key, ArrayType(elementType, containsNull)) :: Nil
+        }
+      }
+      case (key: String, value) => (key, typeOfPrimitiveValue(value)) :: Nil
+    }
+  }
+
+  /**
+   * Converts a Java Map/List to a Scala Map/Seq.
+   * We do not use Jackson's scala module at here because
+   * DefaultScalaModule in jackson-module-scala will make
+   * the parsing very slow.
+   */
+  private def scalafy(obj: Any): Any = obj match {
+    case map: java.util.Map[_, _] =>
+      // .map(identity) is used as a workaround of non-serializable Map
+      // generated by .mapValues.
+      // This issue is documented at https://issues.scala-lang.org/browse/SI-7005
+      JMapWrapper(map).mapValues(scalafy).map(identity)
+    case list: java.util.List[_] =>
+      JListWrapper(list).map(scalafy)
+    case atom => atom
+  }
+
+  private def parseJson(json: RDD[String]): RDD[Map[String, Any]] = {
+    // According to [Jackson-72: https://jira.codehaus.org/browse/JACKSON-72],
+    // ObjectMapper will not return BigDecimal when
+    // "DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS" is disabled
+    // (see NumberDeserializer.deserialize for the logic).
+    // But, we do not want to enable this feature because it will use BigDecimal
+    // for every float number, which will be slow.
+    // So, right now, we will have Infinity for those BigDecimal number.
+    // TODO: Support BigDecimal.
+    json.mapPartitions(iter => {
+      // When there is a key appearing multiple times (a duplicate key),
+      // the ObjectMapper will take the last value associated with this duplicate key.
+      // For example: for {"key": 1, "key":2}, we will get "key"->2.
+      val mapper = new ObjectMapper()
+      iter.map { record =>
+        val parsed = scalafy(mapper.readValue(record, classOf[java.util.Map[String, Any]]))
+        parsed.asInstanceOf[Map[String, Any]]
+      }
+    })
+  }
+
+  private def toLong(value: Any): Long = {
+    value match {
+      case value: java.lang.Integer => value.asInstanceOf[Int].toLong
+      case value: java.lang.Long => value.asInstanceOf[Long]
+    }
+  }
+
+  private def toDouble(value: Any): Double = {
+    value match {
+      case value: java.lang.Integer => value.asInstanceOf[Int].toDouble
+      case value: java.lang.Long => value.asInstanceOf[Long].toDouble
+      case value: java.lang.Double => value.asInstanceOf[Double]
+    }
+  }
+
+  private def toDecimal(value: Any): BigDecimal = {
+    value match {
+      case value: java.lang.Integer => BigDecimal(value)
+      case value: java.lang.Long => BigDecimal(value)
+      case value: java.math.BigInteger => BigDecimal(value)
+      case value: java.lang.Double => BigDecimal(value)
+      case value: java.math.BigDecimal => BigDecimal(value)
+    }
+  }
+
+  private def toJsonArrayString(seq: Seq[Any]): String = {
+    val builder = new StringBuilder
+    builder.append("[")
+    var count = 0
+    seq.foreach {
+      element =>
+        if (count > 0) builder.append(",")
+        count += 1
+        builder.append(toString(element))
+    }
+    builder.append("]")
+
+    builder.toString()
+  }
+
+  private def toJsonObjectString(map: Map[String, Any]): String = {
+    val builder = new StringBuilder
+    builder.append("{")
+    var count = 0
+    map.foreach {
+      case (key, value) =>
+        if (count > 0) builder.append(",")
+        count += 1
+        builder.append(s"""\"${key}\":${toString(value)}""")
+    }
+    builder.append("}")
+
+    builder.toString()
+  }
+
+  private def toString(value: Any): String = {
+    value match {
+      case value: Map[_, _] => toJsonObjectString(value.asInstanceOf[Map[String, Any]])
+      case value: Seq[_] => toJsonArrayString(value)
+      case value => Option(value).map(_.toString).orNull
+    }
+  }
+
+  private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={
+    if (value == null) {
+      null
+    } else {
+      desiredType match {
+        case ArrayType(elementType, _) =>
+          value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
+        case StringType => toString(value)
+        case IntegerType => value.asInstanceOf[IntegerType.JvmType]
+        case LongType => toLong(value)
+        case DoubleType => toDouble(value)
+        case DecimalType() => toDecimal(value)
+        case BooleanType => value.asInstanceOf[BooleanType.JvmType]
+        case NullType => null
+      }
+    }
+  }
+
+  /* was: private */ def asRow(json: Map[String,Any], schema: StructType): Row = {
+    // TODO: Reuse the row instead of creating a new one for every record.
+    val row = new GenericMutableRow(schema.fields.length)
+    schema.fields.zipWithIndex.foreach {
+      // StructType
+      case (StructField(name, fields: StructType, _, _), i) =>
+        row.update(i, json.get(name).flatMap(v => Option(v)).map(
+          v => asRow(v.asInstanceOf[Map[String, Any]], fields)).orNull)
+
+      // ArrayType(StructType)
+      case (StructField(name, ArrayType(structType: StructType, _), _, _), i) =>
+        row.update(i,
+          json.get(name).flatMap(v => Option(v)).map(
+            v => v.asInstanceOf[Seq[Any]].map(
+              e => asRow(e.asInstanceOf[Map[String, Any]], structType))).orNull)
+
+      // Other cases
+      case (StructField(name, dataType, _, _), i) =>
+        row.update(i, json.get(name).flatMap(v => Option(v)).map(
+          enforceCorrectType(_, dataType)).getOrElse(null))
+    }
+
+    row
+  }
+}
\ No newline at end of file
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/AggregateFunctionSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/AggregateFunctionSpec.scala
new file mode 100644
index 0000000..dcde3b4
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/AggregateFunctionSpec.scala
@@ -0,0 +1,223 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.types.StructType
+import org.apache.spark.sql.json.JsonRDDCopy
+import org.scalatest._
+
+class AggregateFunctionSpec
+  extends FlatSpec
+  with ShouldMatchers
+  with BeforeAndAfterAll {
+  val sc = new SparkContext("local[3]", "AggregateFunctions")
+  val sqlc = new SQLContext(sc)
+
+  val doubleList = List((1L, (2L, 1.0)), (1L, (9L, 2.0)),
+    (1L, (8L, 1.5)), (1L, (100L, 2.5)), (2L, (1L, 3.0)))
+  val prettyDoubleInput = doubleList.groupBy(_._1).map(kv =>
+    (kv._1, kv._2.map(_._2).sortBy(_._1).map(_._2))).toList.sortBy(_._1)
+  val doubleRdd = sc.parallelize(doubleList)
+
+  val stringList = List((1L, (2L, "a")), (1L, (9L, "b")),
+    (1L, (8L, "a")), (1L, (100L, "a")), (2L, (1L, "c")))
+  val prettyStringInput = stringList.groupBy(_._1).map(kv =>
+    (kv._1, kv._2.map(_._2).sortBy(_._1).map(_._2))).toList.sortBy(_._1)
+  val stringRdd = sc.parallelize(stringList)
+
+
+  "avg" should "compute the average" taggedAs (LocalTest) in {
+    val funObj = AvgFun
+    val resultRdd = funObj.aggFun(doubleRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printDoubleInputResult(result)
+    result(1).asInstanceOf[Double] shouldBe 1.75
+    result(2).asInstanceOf[Double] shouldBe 3.0
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = AvgFun
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "stddev" should "compute the standard deviation" taggedAs (LocalTest) in {
+    val funObj = StdDevFun
+    val resultRdd = funObj.aggFun(doubleRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printDoubleInputResult(result)
+    result(1).asInstanceOf[Double] shouldBe 0.55901699437495 +- 0.001
+    result(2).asInstanceOf[Double] shouldBe 0.0
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = StdDevFun
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "quantile" should "compute the median" taggedAs (LocalTest) in {
+    val funObj = new QuantileFun()
+    val resultRdd = funObj.aggFun(doubleRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printDoubleInputResult(result)
+    result(1).asInstanceOf[Double] shouldBe 2.0
+    result(2).asInstanceOf[Double] shouldBe 3.0
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = new QuantileFun()
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "linapprox" should "compute the slope and intercept" taggedAs (LocalTest) in {
+    val funObj = LinApproxFun
+    val resultRdd = funObj.aggFun(doubleRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printDoubleInputResult(result)
+    val firstResult = result(1).asInstanceOf[Map[String, Any]]
+    firstResult("a") shouldBe 0.5
+    firstResult("b") shouldBe 1.0
+    val secondResult = result(2).asInstanceOf[Map[String, Any]]
+    secondResult("a").asInstanceOf[Double].isNaN shouldBe true
+    secondResult("b").asInstanceOf[Double].isNaN shouldBe true
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = LinApproxFun
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "fourier" should "compute the Fourier coefficients" taggedAs (LocalTest) ignore {
+    val funObj = FourierCoeffsFun
+    /* The code to compute Fourier coefficients internally uses
+     * FastFourierTransformer.transformInPlace(data, DftNormalization.STANDARD, TransformType.INVERSE)
+     * from Apache Commons Math 3.x. It remains a bit mysterious
+     * what this method actually computes...
+     */
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = FourierCoeffsFun
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "wavelet" should "compute the Haar wavelet coefficients" taggedAs (LocalTest) ignore {
+    val funObj = WaveletCoeffsFun
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = WaveletCoeffsFun
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "histogram" should "compute the value distribution" taggedAs (LocalTest) in {
+    val funObj = new HistogramFun(1.0, 2.0, 3)
+    val resultRdd = funObj.aggFun(doubleRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printDoubleInputResult(result)
+    val firstResult = result(1).asInstanceOf[Seq[Double]]
+    firstResult.size shouldBe 3
+    firstResult(0) shouldBe 0.0
+    firstResult(1) shouldBe 0.5
+    firstResult(2) shouldBe 0.5
+    val secondResult = result(2).asInstanceOf[Seq[Double]]
+    secondResult.size shouldBe 3
+    secondResult(0) shouldBe 0.0
+    secondResult(1) shouldBe 0.0
+    secondResult(2) shouldBe 1.0
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = new HistogramFun(1.0, 2.0, 3)
+    checkTypeResult(funObj, doubleRdd)
+  }
+
+
+  "concat" should "compute the concatenation" taggedAs (LocalTest) in {
+    val funObj = new ConcatFun("")
+    val resultRdd = funObj.aggFun(stringRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printStringInputResult(result)
+    result(1).asInstanceOf[String] shouldBe "aaba"
+    result(2).asInstanceOf[String] shouldBe "c"
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = new ConcatFun("")
+    checkTypeResult(funObj, stringRdd)
+  }
+
+
+  "maxelem" should "compute the most frequeny element" taggedAs (LocalTest) in {
+    val funObj = MaxElemFun
+    val resultRdd = funObj.aggFun(stringRdd)
+    val result: Map[Long, Any] = resultRdd.collect().toMap
+    printStringInputResult(result)
+    result(1).asInstanceOf[String] shouldBe "a"
+    result(2).asInstanceOf[String] shouldBe "c"
+  }
+
+  it should "declare the correct type" taggedAs (LocalTest) in {
+    val funObj = MaxElemFun
+    checkTypeResult(funObj, stringRdd)
+  }
+
+
+  def printDoubleInputResult(result: Map[Long, Any]) = {
+    result.size shouldBe 2
+    prettyDoubleInput.foreach(kv => info(kv._2 + " => " + result(kv._1)))
+  }
+
+  def printStringInputResult(result: Map[Long, Any]) = {
+    result.size shouldBe 2
+    prettyStringInput.foreach(kv => info(kv._2 + " => " + result(kv._1)))
+  }
+
+  def checkTypeResult[T](funObj: SomeAggregateFunction[T], input: RDD[(Long, (Long, T))]) = {
+    // what *should* be the output type?
+    val declaredResultType = funObj.outType
+    val resultRDD = funObj.aggFun(input)
+    val exampleResult = resultRDD.take(1).head._2
+    val wrappedResultRDD = resultRDD.map(row => Map("result" -> row._2))
+    // schema inference is done as in JsonRDD.inferSchema()
+    val inferredSchema = JsonRDDCopy.createSchema(
+      wrappedResultRDD.map(JsonRDDCopy.allKeysWithValueTypes).reduce(_ ++ _)
+    )
+    // what does the schema inferrer claim the returned type to be?
+    val inferredResultType = inferredSchema.fields
+      .filter(_.name == "result").map(_.dataType).head
+    info("%s should have type %s, inferred type was %s".format(exampleResult,
+      declaredResultType, inferredResultType))
+    (inferredResultType, declaredResultType) match {
+      case (s1: StructType, s2: StructType) =>
+        s1.fields should contain theSameElementsAs s2.fields
+      case _ =>
+        inferredResultType shouldBe declaredResultType
+    }
+  }
+
+  override protected def afterAll() = {
+    sc.stop()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/HasKafkaPath.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/HasKafkaPath.scala
index 3452eb6..2d222bf 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/HasKafkaPath.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/HasKafkaPath.scala
@@ -15,7 +15,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
-import java.io.{FileNotFoundException, FileInputStream}
+import java.io.{FileInputStream, FileNotFoundException}
 import java.util.Properties
 
 import org.scalatest._
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/HybridProcessorSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/HybridProcessorSpec.scala
index 3f42a8f..8064621 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/HybridProcessorSpec.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/HybridProcessorSpec.scala
@@ -18,10 +18,11 @@ package us.jubat.jubaql_server.processor
 import java.io.{FileNotFoundException, FileInputStream}
 import java.util.Properties
 
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.{SparkContext, SparkException}
 import org.scalatest._
-import org.apache.spark.SparkContext
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.types.{StringType, LongType, StructField, StructType}
 
 class HybridProcessorSpec
   extends FlatSpec
@@ -52,8 +53,10 @@ class HybridProcessorSpec
   "Static-only processing (empty source)" should "end when the processing is done" taggedAs (LocalTest) in {
     val startTime = System.nanoTime()
     val processor = new HybridProcessor(sc, sqlc, "empty", Nil)
-    processor.start(rdd => rdd)
+    processor.startJValueProcessing(rdd => rdd.count)
+    processor.state shouldBe Running
     processor.awaitTermination()
+    processor.state shouldBe Finished
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 5e9 // less than 5 seconds
   }
@@ -61,12 +64,42 @@ class HybridProcessorSpec
   "Static-only processing (local files)" should "end when the processing is done" taggedAs (LocalTest) in {
     val startTime = System.nanoTime()
     val processor = new HybridProcessor(sc, sqlc, "file://src/test/resources/dummydata", Nil)
-    processor.start(rdd => rdd)
+    processor.startJValueProcessing(rdd => rdd.count)
+    processor.state shouldBe Running
     processor.awaitTermination()
+    processor.state shouldBe Finished
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 10e9 // less than 10 seconds
   }
 
+  it should "throw an exception when started while processing is running" taggedAs (LocalTest) in {
+    val startTime = System.nanoTime()
+    val processor = new HybridProcessor(sc, sqlc, "file://src/test/resources/dummydata", Nil)
+    processor.startJValueProcessing(rdd => rdd.count)
+    processor.state shouldBe Running
+    a[RuntimeException] should be thrownBy {
+      processor.startJValueProcessing(rdd => rdd.count)
+    }
+    processor.awaitTermination()
+    processor.state shouldBe Finished
+    val executionTime = (System.nanoTime() - startTime)
+    executionTime.toDouble should be < 10e9 // less than 10 seconds
+  }
+
+  it should "throw an exception when started again after finishing" taggedAs (LocalTest) in {
+    val startTime = System.nanoTime()
+    val processor = new HybridProcessor(sc, sqlc, "file://src/test/resources/dummydata", Nil)
+    processor.startJValueProcessing(rdd => rdd.count)
+    processor.state shouldBe Running
+    processor.awaitTermination()
+    processor.state shouldBe Finished
+    val executionTime = (System.nanoTime() - startTime)
+    executionTime.toDouble should be < 10e9 // less than 10 seconds
+    a[RuntimeException] should be thrownBy {
+      processor.startJValueProcessing(rdd => rdd.count)
+    }
+  }
+
   override def afterAll = {
     sc.stop()
   }
@@ -96,10 +129,13 @@ class HDFSStreamSpec
   "HDFS-only processing on a populated directory" should "end when the processing is done" taggedAs (HDFSTest) in {
     val path = "hdfs:///user/fluentd/dummy"
     val processor = new HybridProcessor(sc, sqlc, path, Nil)
+    processor.state shouldBe Initialized
     val startTime = System.nanoTime()
-    val stopFun = processor.start(rdd => rdd)._1
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
 
     processor.awaitTermination()
+    processor.state shouldBe Finished
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 50e9 // less than 50 seconds
     val (staticInfo, streamInfo) = stopFun()
@@ -115,11 +151,14 @@ class HDFSStreamSpec
   it should "be manually stoppable" taggedAs (HDFSTest) in {
     val path = "hdfs:///user/fluentd/dummy"
     val processor = new HybridProcessor(sc, sqlc, path, Nil)
+    processor.state shouldBe Initialized
     val startTime = System.nanoTime()
-    val stopFun = processor.start(rdd => rdd)._1
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     Thread.sleep(5000) // if we stop during the first batch, something goes wrong
     val (staticInfo, streamInfo) = stopFun()
     val executionTime = (System.nanoTime() - startTime)
+    processor.state shouldBe Finished
     executionTime.toDouble should be < 50e9 // less than 50 seconds
     // check number of items and received id
     staticInfo.itemCount should be > 10000L
@@ -133,8 +172,11 @@ class HDFSStreamSpec
   "HDFS-only processing on an empty directory" should "not process anything" taggedAs (HDFSTest) in {
     val path = "hdfs:///user/empty"
     val processor = new HybridProcessor(sc, sqlc, path, Nil)
-    val stopFun = processor.start(rdd => rdd)._1
+    processor.state shouldBe Initialized
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     processor.awaitTermination()
+    processor.state shouldBe Finished
     val (staticInfo, streamInfo) = stopFun()
     // check number of items and received id
     staticInfo.itemCount shouldBe 0L
@@ -175,10 +217,13 @@ class KafkaStreamSpec
   "Kafka-only processing on a populated topic" should "be manually stoppable before starting" taggedAs (KafkaTest) in {
     val path = s"kafka://$kafkaPath/dummy/1"
     val processor = new HybridProcessor(sc, sqlc, "empty", path :: Nil)
+    processor.state shouldBe Initialized
     val startTime = System.nanoTime()
-    val stopFun = processor.start(rdd => rdd)._1
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     Thread.sleep(1700) // if we stop during the first batch, something goes wrong
     val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
     processor.awaitTermination()
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 10e9 // less than 10 seconds
@@ -194,17 +239,20 @@ class KafkaStreamSpec
   it should "be manually stoppable while running" taggedAs (KafkaTest) in {
     val path = s"kafka://$kafkaPath/dummy/1"
     val processor = new HybridProcessor(sc, sqlc, "empty", path :: Nil)
+    processor.state shouldBe Initialized
     val startTime = System.nanoTime()
-    val stopFun = processor.start(rdd => rdd)._1
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     Thread.sleep(10000) // if we stop during the first batch, something goes wrong
     val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 10e10 // less than 100 seconds
     // check number of items and received id
     staticInfo.itemCount shouldBe 0L
     staticInfo.runtime should be > 0L
     staticInfo.maxId shouldBe empty
-    streamInfo.itemCount should be > 10000L
+    streamInfo.itemCount should be > 100L
     streamInfo.runtime should be > 0L
     streamInfo.maxId should not be empty
   }
@@ -212,16 +260,19 @@ class KafkaStreamSpec
   it should "be processable using SQL" taggedAs (KafkaTest) in {
     val path = s"kafka://$kafkaPath/dummy/1"
     val processor = new HybridProcessor(sc, sqlc, "empty", path :: Nil)
+    processor.state shouldBe Initialized
     val startTime = System.nanoTime()
-    val schema = StructType(List(StructField("video_id", LongType, false),
-      StructField("title", StringType, false)))
+    val schema = StructType(List(StructField("video_id", LongType, nullable = false),
+      StructField("title", StringType, nullable = false)))
     import sqlc._
-    val stopFun = processor.start(rdd => {
+    val stopFun = processor.startTableProcessing(rdd => {
       rdd.registerTempTable("test")
-      sql("SELECT video_id FROM test LIMIT 10")
+      sql("SELECT video_id FROM test LIMIT 10").count
     }, Some(schema))._1
+    processor.state shouldBe Running
     Thread.sleep(15000) // if we stop during the first batch, something goes wrong
     val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
     val executionTime = (System.nanoTime() - startTime)
     executionTime.toDouble should be < 40e9 // less than 40 seconds
     // check number of items and received id
@@ -229,7 +280,9 @@ class KafkaStreamSpec
     staticInfo.runtime should be > 0L
     staticInfo.maxId shouldBe empty
     streamInfo.itemCount should be > 0L
-    streamInfo.itemCount should be < 100L
+    // even though we *select* only a couple of items, the number of
+    // *seen* items will still be large, so we can't talk about an upper
+    // bound of items here
     streamInfo.runtime should be > 0L
     streamInfo.maxId should not be empty
   }
@@ -238,9 +291,12 @@ class KafkaStreamSpec
     Thread.sleep(2000)
     val path = s"kafka://$kafkaPath/notopic/1"
     val processor = new HybridProcessor(sc, sqlc, "empty", path :: Nil)
-    val stopFun = processor.start(rdd => rdd)._1
+    processor.state shouldBe Initialized
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     Thread.sleep(10000) // if we stop during the first batch, something goes wrong
     val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
     // check number of items and received id
     staticInfo.itemCount shouldBe 0L
     staticInfo.runtime should be > 0L
@@ -267,9 +323,51 @@ class HDFSKafkaStreamSpec
     val hdfsPath = "hdfs:///user/fluentd/dummy"
     val kafkaURI = s"kafka://$kafkaPath/dummy/1"
     val processor = new HybridProcessor(sc, sqlc, hdfsPath, kafkaURI :: Nil)
-    val stopFun = processor.start(rdd => rdd)._1
+    processor.state shouldBe Initialized
+    val stopFun = processor.startJValueProcessing(rdd => rdd.count)._1
+    processor.state shouldBe Running
     Thread.sleep(60000)
     val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
+    // check number of items and received id
+    staticInfo.itemCount should be > 10000L
+    staticInfo.runtime should be > 0L
+    staticInfo.maxId should not be empty
+    streamInfo.itemCount should be > 100L
+    streamInfo.runtime should be > 0L
+    streamInfo.maxId should not be empty
+    // we can't make a comparison such as "id x should be N larger than id y"
+    // with string ids, but we can check one is larger than the other
+    streamInfo.maxId.get should be > staticInfo.maxId.get
+  }
+
+  /* This test is ignored because
+   * - due to some bug in our test setup, some KDD dummy data has slipped
+   *   into the test data (just very few items, like < 1%)
+   * - and this data has some IDs longer than Long in it, but they are
+   *   not discovered by the schema inference process (since we look
+   *   only at a small percentage of the data for schema inference)
+   *   and so the schema is inferred as having Long at that place.
+   *   When we hit a number longer than Long in the actual processing, we
+   *   will get a cast exception for BigDecimal => Long.
+   * This cannot really be worked around except for (fixing the bug in the
+   * test setup and) increasing the ratio of looked-at items for schema
+   * inference.
+   */
+  it should "be able to process SQL queries" taggedAs (HDFSTest, KafkaTest) ignore {
+    val hdfsPath = "hdfs:///user/fluentd/dummy"
+    val kafkaURI = s"kafka://$kafkaPath/dummy/1"
+    val processor = new HybridProcessor(sc, sqlc, hdfsPath, kafkaURI :: Nil)
+    processor.state shouldBe Initialized
+    import sqlc._
+    val stopFun = processor.startTableProcessing(rdd => {
+      rdd.registerTempTable("test")
+      sql("SELECT title, description FROM test").count
+    }, None)._1
+    processor.state shouldBe Running
+    Thread.sleep(60000)
+    val (staticInfo, streamInfo) = stopFun()
+    processor.state shouldBe Finished
     // check number of items and received id
     staticInfo.itemCount should be > 10000L
     staticInfo.runtime should be > 0L
@@ -286,3 +384,316 @@ class HDFSKafkaStreamSpec
     sc.stop()
   }
 }
+
+class SQLSpec
+  extends FeatureSpec
+  with GivenWhenThen
+  with ShouldMatchers
+  with BeforeAndAfterAll {
+  val sc = new SparkContext("local[3]", "SQLSpec")
+  val sqlc = new SQLContext(sc)
+
+  import sqlc._
+
+  val dummyDataUrl = "file://src/test/resources/dummydata"
+
+  feature("The user can query stored JSON with SQL via an inferred schema") {
+    scenario("The inferred schema is used correctly") {
+      val startTime = System.nanoTime()
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("no schema is provided by the user")
+      val schema: Option[StructType] = None
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      val maxIdFun = processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, gender FROM test")
+        resultData ++= resultRdd.collect()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+      val executionTime = System.nanoTime() - startTime
+      executionTime.toDouble should be < 10e9 // less than 10 seconds
+
+      Then("all rows should be processed")
+      val maxId = maxIdFun()
+      // compare to the contents of src/test/resources/3.json
+      maxId shouldBe Some("2014-11-21T14:54:27")
+      resultData should contain theSameElementsAs
+        List(Row(21, "m"), Row(22, "f"), Row(23, "f"), Row(24, "f"),
+        Row(21, "m"), Row(18, "f"), Row(22, "m"), Row(31, "f"), Row(23, "m"),
+          Row(19, "m"), Row(24, "m"), Row(26, "f"))
+    }
+
+    scenario("The inferred schema is used badly") {
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("no schema is provided by the user")
+      val schema: Option[StructType] = None
+
+      And("a statement with bad columns is run")
+      processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        sql("SELECT name, gender FROM test").count
+      }, schema)
+      processor.state shouldBe Running
+
+      Then("an exception should be thrown during processing")
+      val thrown = the[TreeNodeException[_]] thrownBy processor.awaitTermination()
+      thrown.getMessage should startWith("Unresolved attributes: 'name")
+      processor.state shouldBe Finished
+    }
+
+    scenario("An unknown table name is used") {
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("no schema is provided by the user")
+      val schema: Option[StructType] = None
+
+      And("a statement with bad table name is run")
+      processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        sql("SELECT name, gender FROM test2").count
+      }, schema)
+      processor.state shouldBe Running
+
+      Then("an exception should be thrown during processing")
+      val thrown = the[RuntimeException] thrownBy processor.awaitTermination()
+      thrown.getMessage should startWith("Table Not Found: test2")
+      processor.state shouldBe Finished
+    }
+
+    scenario("An empty data set is used") {
+      Given("an empty data set")
+      val processor = new HybridProcessor(sc, sqlc, "empty", Nil)
+      processor.state shouldBe Initialized
+
+      When("no schema is provided by the user")
+      val schema: Option[StructType] = None
+
+      And("any statement is run")
+      var numberOfCalls = 0
+      var numberOfItems = 0L
+      processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, gender FROM test")
+        numberOfCalls += 1
+        numberOfItems += resultRdd.count()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+
+      Then("no rows should be processed")
+      numberOfItems shouldBe 0
+      And("the function should not ever be called")
+      numberOfCalls shouldBe 0
+    }
+  }
+
+  feature("The user can query stored JSON with SQL via a given schema") {
+    scenario("The given schema is complete and correct") {
+      val startTime = System.nanoTime()
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("the correct schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false),
+        StructField("gender", StringType, nullable = false))))
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      val maxIdFun = processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, gender FROM test")
+        resultData ++= resultRdd.collect()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+      val executionTime = System.nanoTime() - startTime
+      executionTime.toDouble should be < 10e9 // less than 10 seconds
+
+      Then("all rows should be processed")
+      val maxId = maxIdFun()
+      // compare to the contents of src/test/resources/3.json
+      maxId shouldBe Some("2014-11-21T14:54:27")
+      resultData should contain theSameElementsAs
+        List(Row(21, "m"), Row(22, "f"), Row(23, "f"), Row(24, "f"),
+          Row(21, "m"), Row(18, "f"), Row(22, "m"), Row(31, "f"), Row(23, "m"),
+          Row(19, "m"), Row(24, "m"), Row(26, "f"))
+    }
+
+    scenario("The given schema covers only a subset of columns") {
+      val startTime = System.nanoTime()
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("an incomplete schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false))))
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      val maxIdFun = processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age FROM test")
+        resultData ++= resultRdd.collect()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+      val executionTime = System.nanoTime() - startTime
+      executionTime.toDouble should be < 10e9 // less than 10 seconds
+
+      Then("all rows should be processed")
+      val maxId = maxIdFun()
+      // compare to the contents of src/test/resources/3.json
+      maxId shouldBe Some("2014-11-21T14:54:27")
+      resultData should contain theSameElementsAs
+        List(Row(21), Row(22), Row(23), Row(24),
+          Row(21), Row(18), Row(22), Row(31), Row(23),
+          Row(19), Row(24), Row(26))
+    }
+
+    scenario("The given schema contains a wrong datatype") {
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("a wrong schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false),
+        StructField("gender", IntegerType, nullable = false) // actually: StringType
+      )))
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        sql("SELECT age, gender FROM test").count
+      }, schema)
+      processor.state shouldBe Running
+
+      Then("an exception should be thrown during processing")
+      val thrown = the[SparkException] thrownBy processor.awaitTermination()
+      thrown.getMessage should include("ClassCastException: java.lang.String" +
+        " cannot be cast to java.lang.Integer")
+      processor.state shouldBe Finished
+    }
+
+    scenario("The given schema includes additional nullable columns") {
+      val startTime = System.nanoTime()
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("an extended schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false),
+        StructField("name", StringType, nullable = true))))
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      val maxIdFun = processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, name FROM test")
+        resultData ++= resultRdd.collect()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+      val executionTime = System.nanoTime() - startTime
+      executionTime.toDouble should be < 10e9 // less than 10 seconds
+
+      Then("missing values should be nulled")
+      val maxId = maxIdFun()
+      // compare to the contents of src/test/resources/3.json
+      maxId shouldBe Some("2014-11-21T14:54:27")
+      resultData should contain theSameElementsAs
+        List(Row(21, null), Row(22, null), Row(23, null), Row(24, null),
+          Row(21, null), Row(18, null), Row(22, null), Row(31, null), Row(23, null),
+          Row(19, null), Row(24, null), Row(26, null))
+    }
+
+    scenario("The given schema includes additional non-nullable columns") {
+      val startTime = System.nanoTime()
+      Given("a test data set")
+      val processor = new HybridProcessor(sc, sqlc, dummyDataUrl, Nil)
+      processor.state shouldBe Initialized
+
+      When("an extended schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false),
+        StructField("name", StringType, nullable = false))))
+
+      And("a correct statement is run")
+      var resultData: Array[Row] = Array()
+      val maxIdFun = processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, name FROM test")
+        resultData ++= resultRdd.collect()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+      val executionTime = System.nanoTime() - startTime
+      executionTime.toDouble should be < 10e9 // less than 10 seconds
+
+      Then("missing values should be nulled")
+      // TODO: actually they should not (Spark issue?)
+      val maxId = maxIdFun()
+      // compare to the contents of src/test/resources/3.json
+      maxId shouldBe Some("2014-11-21T14:54:27")
+      resultData should contain theSameElementsAs
+        List(Row(21, null), Row(22, null), Row(23, null), Row(24, null),
+          Row(21, null), Row(18, null), Row(22, null), Row(31, null), Row(23, null),
+          Row(19, null), Row(24, null), Row(26, null))
+    }
+
+    scenario("An empty data set is used") {
+      Given("an empty data set")
+      val processor = new HybridProcessor(sc, sqlc, "empty", Nil)
+      processor.state shouldBe Initialized
+
+      When("an extended schema is provided by the user")
+      val schema: Option[StructType] = Some(StructType(List(
+        StructField("age", IntegerType, nullable = false),
+        StructField("name", StringType, nullable = false))))
+
+      And("any statement is run")
+      var numberOfCalls = 0
+      var numberOfItems = 0L
+      processor.startTableProcessing(rdd => {
+        rdd.registerTempTable("test")
+        val resultRdd = sql("SELECT age, gender FROM test")
+        numberOfCalls += 1
+        numberOfItems += resultRdd.count()
+      }, schema)._2
+      processor.state shouldBe Running
+      processor.awaitTermination()
+      processor.state shouldBe Finished
+
+      Then("no rows should be processed")
+      numberOfItems shouldBe 0
+      And("the function should not ever be called")
+      numberOfCalls shouldBe 0
+    }
+  }
+
+  override def afterAll = {
+    sc.stop()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/JavaScriptSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/JavaScriptSpec.scala
new file mode 100644
index 0000000..b61d98e
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/JavaScriptSpec.scala
@@ -0,0 +1,264 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import java.net.URLDecoder
+
+import org.scalatest.{FlatSpec, ShouldMatchers}
+import org.subethamail.wiser.Wiser
+import unfiltered.request._
+import unfiltered.response._
+import unfiltered.util.RunnableServer
+
+import scala.collection.JavaConversions._
+import scala.util.Success
+
+class JavaScriptSpec extends FlatSpec with ShouldMatchers with MockServer {
+  protected var wiser: Wiser = null
+  val funcBodyTmpl = s"function test(x) { %s }"
+
+  "JavaScript" should "allow simple functions" taggedAs (LocalTest) in {
+    val body = "return x;"
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[Double]("test",
+      1, funcBody, Double.box(17.0))
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe 17.0
+    })
+  }
+
+  it should "allow accessing Scala object functions" taggedAs (LocalTest) in {
+    val body =
+      """
+        |return jql.test();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe "test"
+    })
+  }
+
+  it should "allow GETting from an HTTP server" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/test";
+        |var result = jql.httpGet(url);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe "thanks for your GET"
+    })
+  }
+
+  it should "allow GETting from an HTTP server with key-value list" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/test";
+        |var obj = {"user": 1234, "msg": "こんにちは"};
+        |var result = jql.httpGet(url, obj);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe "thanks for your GET with msg 'こんにちは'"
+    })
+  }
+
+  it should "allow (some) parallel HTTP requests" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/sleep";
+        |var result = jql.httpGet(url);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    // up to 8 requests are processed in parallel, the 9th is
+    // executed later (seems like 8 is the thread pool limit for
+    // either dispatch or unfiltered)
+    val loop = (1 to 8).toList.par
+    val startTime = System.currentTimeMillis()
+    val resultOpts = loop.map(_ => {
+      JavaScriptUDFManager.registerAndTryCall[String]("test", 0, funcBody)
+    }).seq
+    val endTime = System.currentTimeMillis()
+    val duration = endTime - startTime
+    info("%s parallel HTTP calls took %s ms".format(loop.size, duration))
+    duration should be < 2000L
+    resultOpts.foreach(resultOpt => {
+      resultOpt shouldBe a[Success[_]]
+      resultOpt.foreach(result => {
+        result shouldBe "I slept a bit"
+      })
+    })
+  }
+
+  it should "allow POSTing to an HTTP server" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/test";
+        |var result = jql.httpPost(url);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe "thanks for your POST"
+    })
+  }
+
+  it should "allow POSTing a JSON-ified Object to an HTTP server" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/test";
+        |var obj = {"user": 1234, "msg": "こんにちは"};
+        |var json = JSON.stringify(obj);
+        |var result = jql.httpPost(url, json);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result should startWith("thanks for your POST with body:")
+      result should include("こんにちは")
+    })
+  }
+
+  it should "allow POSTing a key-value list to an HTTP server" taggedAs (LocalTest) in {
+    val body =
+      """
+        |var url = "http://localhost:12345/test";
+        |var obj = {"user": 1234, "msg": "こんにちは"};
+        |var result = jql.httpPost(url, obj);
+        |if (result.isFailure())
+        |  return result.failed().get().getMessage();
+        |else
+        |  return result.get();
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    resultOpt.foreach(result => {
+      result shouldBe "thanks for your POST with msg 'こんにちは'"
+    })
+  }
+
+  it should "allow sending emails" taggedAs (LocalTest) in {
+    val body =
+      """
+        |jql.sendMail("localhost", 1025,
+        |  "me@privacy.net", "root@localhost",
+        |  "こんにちは",
+        |  "Just testing email. よろしく。");
+      """.stripMargin
+    val funcBody = funcBodyTmpl.format(body)
+    val resultOpt = JavaScriptUDFManager.registerAndTryCall[String]("test",
+      0, funcBody)
+    resultOpt shouldBe a[Success[_]]
+    wiser.getMessages should not be empty
+    val msg = wiser.getMessages.head
+    val mime = msg.getMimeMessage
+    mime.getFrom.head.toString shouldBe "me@privacy.net"
+    mime.getAllRecipients.head.toString shouldBe "root@localhost"
+    mime.getSubject shouldBe "こんにちは"
+    mime.getContent.toString should include("よろしく")
+  }
+
+  // this server mocks the gateway
+  protected val server: RunnableServer = {
+    unfiltered.netty.Server.http(12345).plan(
+      // define the server behavior
+      unfiltered.netty.cycle.Planify {
+        // GET with no parameters
+        case req@GET(Path(Seg("test" :: Nil))) if req.parameterNames.isEmpty =>
+          Ok ~> ResponseString("thanks for your GET")
+        // GET with URL parameters
+        case req@GET(Path(Seg("test" :: Nil))) =>
+          val message = req.parameterValues("msg").head
+          Ok ~> ContentType("text/plain; charset=utf-8") ~>
+            ResponseString(s"thanks for your GET with msg '$message'")
+        // simulate long computation
+        case req@GET(Path(Seg("sleep" :: Nil))) =>
+          Thread.sleep(1000)
+          Ok ~> ResponseString("I slept a bit")
+        // POST
+        case req@POST(Path(Seg("test" :: Nil))) =>
+          val body = Body.string(req)
+          // POST with no body
+          if (body.isEmpty) {
+            Ok ~> ResponseString("thanks for your POST")
+          }
+          // POST with form-encoded values
+          else if (req.headers("Content-Type").exists(_.contains("form-urlencoded"))) {
+            val parts = body.split("&").map(s => {
+              (s.split("=")(0), s.split("=")(1))
+            }).toMap
+            val message = URLDecoder.decode(parts("msg"), "utf-8")
+            Ok ~> ContentType("text/plain; charset=utf-8") ~>
+              ResponseString(s"thanks for your POST with msg '$message'")
+          }
+          // POST with other body
+          else {
+            Ok ~> ContentType("text/plain; charset=utf-8") ~>
+              ResponseString("thanks for your POST with body: " + body)
+          }
+        case _ =>
+          NotFound ~> ResponseString("404")
+      })
+  }
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    wiser = new Wiser()
+    wiser.setPort(1025)
+    wiser.start()
+  }
+
+  override protected def afterAll(): Unit = {
+    wiser.stop()
+    super.afterAll()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLExtractorSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLExtractorSpec.scala
new file mode 100644
index 0000000..ab221ae
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLExtractorSpec.scala
@@ -0,0 +1,504 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import java.io.{File, FileWriter, IOException}
+
+import com.typesafe.scalalogging.slf4j.Logger
+import org.apache.spark.SparkContext
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+import org.scalatest._
+import us.jubat.common.Datum
+import us.jubat.jubaql_server.processor.json.JubaQLResponse
+
+import scala.collection.JavaConversions._
+import scala.concurrent.{Await => ScAwait}
+import scala.sys.process._
+
+class JubaQLExtractorSpec extends FlatSpec
+with ShouldMatchers with EitherValues with BeforeAndAfterAll {
+  private var sc: SparkContext = null
+  private var service: JubaQLServiceTester = null
+
+  "* WITH id" should "convert data with num and str" taggedAs(JubatusTest, LocalTest) in {
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS * WITH id CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 27, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 3
+    fvLines should contain("/foo$bar bar@str#tf/bin: 1")
+    fvLines should contain("/f@num: 1.23")
+    fvLines should contain("/hoge@num: 27")
+  }
+
+  "hoge WITH func" should "fail if func is unknown" taggedAs(JubatusTest, LocalTest) in {
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS * WITH func CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    // when submitted via service.processStmt this will already fail earlier
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 27, "f": 1.23}"""
+    val thrown = the[RuntimeException] thrownBy extractDatum(cm, inputData)
+    thrown.getMessage should startWith("feature function 'func' is not found")
+  }
+
+  "hoge WITH square_root" should "extract only hoge and use the square_root feature function on it" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS hoge WITH square_root CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 1
+    jubatusInputDatum.getStringValues.size shouldBe 0
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 1
+    fvLines should contain("/square_root#hoge@num: 4")
+  }
+
+  "foo WITH unigram" should "extract only foo and pass the value on to Jubatus" taggedAs(JubatusTest, LocalTest) in {
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS foo WITH unigram CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 0
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 4
+    fvLines should contain("/foo-unigram-jubaconv$ @unigram#tf/bin: 1")
+    fvLines should contain("/foo-unigram-jubaconv$b@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$a@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$r@unigram#tf/bin: 2")
+  }
+
+  "foo WITH unigram, bar WITH bigram" should "extract only foo and bar and pass the values on to Jubatus" taggedAs(JubatusTest, LocalTest) in {
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS foo WITH unigram, bar WITH bigram CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "bar": "hello", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 0
+    jubatusInputDatum.getStringValues.size shouldBe 2
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 8
+    fvLines should contain("/foo-unigram-jubaconv$ @unigram#tf/bin: 1")
+    fvLines should contain("/foo-unigram-jubaconv$b@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$a@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$r@unigram#tf/bin: 2")
+    fvLines should contain("/bar-bigram-jubaconv$he@bigram#tf/bin: 1")
+    fvLines should contain("/bar-bigram-jubaconv$el@bigram#tf/bin: 1")
+    fvLines should contain("/bar-bigram-jubaconv$ll@bigram#tf/bin: 1")
+    fvLines should contain("/bar-bigram-jubaconv$lo@bigram#tf/bin: 1")
+  }
+
+  "hoge WITH square_root, foo WITH unigram, *" should "extract all values with the correct functions" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS hoge WITH square_root, foo WITH unigram, * CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 6
+    fvLines should contain("/square_root#hoge@num: 4")
+    fvLines should contain("/foo-unigram-jubaconv$ @unigram#tf/bin: 1")
+    fvLines should contain("/foo-unigram-jubaconv$b@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$a@unigram#tf/bin: 2")
+    fvLines should contain("/foo-unigram-jubaconv$r@unigram#tf/bin: 2")
+    fvLines should contain("/f@num: 1.23")
+  }
+
+  "hoge WITH square_root, hoge" should "extract hoge twice, with square_root and with id" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS hoge WITH square_root, hoge CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 0
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 2
+    fvLines should contain("/square_root#hoge@num: 4")
+    fvLines should contain("/hoge@num: 16")
+  }
+
+  "(hoge, foo) WITH repeat" should "apply repeat to hoge and foo" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION repeat(n numeric, s string) LANGUAGE JavaScript AS $$
+        |return {"value": Array(n+1).join(s)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS (hoge, foo) WITH repeat CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 2, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 0
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 1
+    fvLines should contain("/repeat#hoge,foo$bar barbar bar@str#tf/bin: 1")
+  }
+
+  "(hoge, foo) WITH repeat, hoge WITH square_root, *" should "apply repeat to hoge and foo, square_root to hoge, id to the rest" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION repeat(n numeric, s string) LANGUAGE JavaScript AS $$
+        |return {"value": Array(n+1).join(s)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    val cffStmt2 =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt2) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS (hoge, foo) WITH repeat, hoge WITH square_root, * CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 4, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 3
+    fvLines should contain("/repeat#hoge,foo$bar barbar barbar barbar bar@str#tf/bin: 1")
+    fvLines should contain("/square_root#hoge@num: 2")
+    fvLines should contain("/f@num: 1.23")
+  }
+
+  "hoge WITH stats" should "apply the multi-valued stats function to hoge" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION stats(n numeric) LANGUAGE JavaScript AS $$
+        |return {"plus": n, "minus": -n};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS hoge WITH stats CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 4, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 0
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 2
+    fvLines should contain("/stats#hoge#plus@num: 4")
+    fvLines should contain("/stats#hoge#minus@num: -4")
+  }
+
+  "h*" should "pick only columns starting with h" taggedAs(JubatusTest, LocalTest) in {
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS h* CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hello": "test", "hoge": 27, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 1
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 2
+    fvLines should contain("/hello$test@str#tf/bin: 1")
+    fvLines should contain("/hoge@num: 27")
+  }
+
+  "*e" should "pick only columns ending with e" taggedAs(JubatusTest, LocalTest) in {
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS *e CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foe": "bar bar", "hello": "test", "hoge": 27, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 1
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 2
+    fvLines should contain("/foe$bar bar@str#tf/bin: 1")
+    fvLines should contain("/hoge@num: 27")
+  }
+
+  "h* WITH square_root, *" should "apply square_root on columns starting with h and the rest with id" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS h* WITH square_root, * CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foo": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 2
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 3
+    fvLines should contain("/square_root#hoge@num: 4")
+    fvLines should contain("/foo$bar bar@str#tf/bin: 1")
+    fvLines should contain("/f@num: 1.23")
+  }
+
+  "h* WITH square_root, *e" should "pick hoge only with square_root" taggedAs(JubatusTest, LocalTest) in {
+    // create a feature function (this changes state of service)
+    val cffStmt =
+      """
+        |CREATE FEATURE FUNCTION square_root(x numeric) LANGUAGE JavaScript AS $$
+        |return {"value": Math.sqrt(x)};
+        |$$
+      """.stripMargin
+    service.processStmt(cffStmt) shouldBe a[Right[_, _]]
+
+    // CREATE MODEL statement
+    val cmStmt =
+      """CREATE CLASSIFIER MODEL test1 (label: label) AS h* WITH square_root, *e CONFIG
+        |'{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""".stripMargin
+    val parsedStmt = service.parseStmt(cmStmt).right.get
+    parsedStmt shouldBe a[CreateModel]
+    val cm = parsedStmt.asInstanceOf[CreateModel]
+    // extract datum from input data
+    val inputData = """{"label": "abc", "foe": "bar bar", "hoge": 16, "f": 1.23}"""
+    val jubatusInputDatum = extractDatum(cm, inputData)
+    jubatusInputDatum.getNumValues.size shouldBe 1
+    jubatusInputDatum.getStringValues.size shouldBe 1
+    // convert datum to feature vector
+    val jsonConfig = service.jubatusJsonConfig(cm).right.get
+    val fvLines = convertToFeatureVector(jsonConfig, jubatusInputDatum)
+    fvLines.size shouldBe 2
+    fvLines should contain("/square_root#hoge@num: 4")
+    fvLines should contain("/foe$bar bar@str#tf/bin: 1")
+  }
+
+  // create a subclass to access the protected methods
+  class JubaQLServiceTester(sc: SparkContext) extends JubaQLService(sc, RunMode.Development, "file:///tmp/spark") {
+    // parses a JubaQL statement and returns the corresponding AST subclass
+    // on success
+    def parseStmt(s: String): Either[(Int, String), JubaQLAST] = {
+      logger.debug("request body: %s".format(s))
+      val command = Some(compact(render(JObject("query" -> JString(s)))))
+      val parsedCommand = command.map(parseJson)
+      parsedCommand.getOrElse(Left((500, "error")))
+    }
+
+    // parses and executes a JubaQLStatement and returns the corresponding
+    // AST subclass and a response object on success
+    def processStmt(s: String): Either[(Int, String), (JubaQLAST, JubaQLResponse)] = {
+      val parsedCommand = Some(parseStmt(s))
+      val actionResult = parsedCommand.map(_.right.flatMap(cmd => {
+        takeAction(cmd).right.map((cmd, _))
+      }))
+      actionResult.getOrElse(Left((500, "error")))
+    }
+
+    // this creates a JSON configuration string for Jubatus as
+    // computed from a CREATE MODEL statement
+    def jubatusJsonConfig(cm: CreateModel): Either[(Int, String), String] = {
+      complementInputJson(cm.configJson).right.map(j => pretty(render(j)))
+    }
+
+    // returns the logger used by the JubaQLService
+    def myLogger(): Logger = {
+      logger
+    }
+  }
+
+  // extract a datum from the given JSON-shaped data using the rules
+  // from the given CreateModel statement (will use the service's current
+  // state)
+  def extractDatum(cm: CreateModel, data: String): Datum = {
+    DatumExtractor.extract(cm, data, service.featureFunctions, service.myLogger)
+  }
+
+  // convert the given datum to a feature vector using the given
+  // Jubatus configuration. The output are the lines output by
+  // `jubaconv -i json -o fv`
+  def convertToFeatureVector(jsonConfig: String, datum: Datum): List[String] = {
+    def writeStringToFile(file: File, data: String, appending: Boolean = false) =
+      using(new FileWriter(file, appending))(_.write(data))
+
+    def using[A <: {def close() : Unit}, B](resource: A)(f: A => B): B =
+      try f(resource) finally resource.close()
+
+    try {
+      // write JSON config to temp file
+      val confFile = File.createTempFile("jubaql_", ".json")
+      using(new FileWriter(confFile))(writer => writer.write(jsonConfig))
+      scala.compat.Platform.collectGarbage() // JVM Windows related bug workaround JDK-4715154
+      confFile.deleteOnExit()
+
+      // write input data to temp file
+      val inputFile = File.createTempFile("jubaql_input_", ".json")
+      using(new FileWriter(inputFile))(writer => writer.write(datumToJson(datum)))
+      scala.compat.Platform.collectGarbage() // JVM Windows related bug workaround JDK-4715154
+      inputFile.deleteOnExit()
+
+      val convOutput = ("jubaconv" :: "-i" :: "json" :: "-o" :: "fv" :: "-c" ::
+        confFile.toString :: Nil).#<(inputFile).!!
+      convOutput.trim.split('\n').toList
+    } catch {
+      case e: IOException =>
+        Nil
+    }
+  }
+
+  def datumToJson(d: Datum): String = {
+    val nvals = JObject(d.getNumValues.toList.map(kv => kv.key -> JDouble(kv.value)): _*)
+    val svals = JObject(d.getStringValues.toList.map(kv => kv.key -> JString(kv.value)): _*)
+    pretty(render(nvals ~ svals))
+  }
+
+  override protected def beforeAll(): Unit = {
+    sc = new SparkContext("local[3]", "JubaQL Processor Test")
+    service = new JubaQLServiceTester(sc)
+  }
+
+  override protected def afterAll(): Unit = {
+    sc.stop()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLParserSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLParserSpec.scala
index d476e5a..e08e70b 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLParserSpec.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLParserSpec.scala
@@ -15,6 +15,9 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 package us.jubat.jubaql_server.processor
 
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.expressions.Alias
+import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.scalatest.FlatSpec
 import org.scalatest.Matchers._
 
@@ -42,7 +45,7 @@ class JubaQLParserSpec extends FlatSpec {
     ds.sinkStreams shouldBe List("fluentd://f1", "fluentd://f2")
   }
 
-  "A JubaQLParser" should "recognize CREATE DATASOURCE" taggedAs (LocalTest) in {
+  it should "recognize CREATE DATASOURCE" taggedAs (LocalTest) in {
     val parser = new JubaQLParser
     val result: Option[JubaQLAST] = parser.parse(
       """
@@ -59,6 +62,40 @@ class JubaQLParserSpec extends FlatSpec {
     ds.sinkStreams shouldBe List("fluentd://f1", "fluentd://f2")
   }
 
+  it should "recognize CREATE DATASOURCE with keyword-named columns" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE DATASOURCE test1 (status string, count numeric, time boolean)
+      FROM (STORAGE: "hdfs://hello", STREAM: "fluentd://f1", STREAM: "fluentd://f2")
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val ds = result.get.asInstanceOf[CreateDatasource]
+    ds.sourceName shouldBe "test1"
+    ds.columns shouldBe List(("status", "string"), ("count", "numeric"), ("time", "boolean"))
+    ds.sinkStorage shouldBe "hdfs://hello"
+    ds.sinkStreams shouldBe List("fluentd://f1", "fluentd://f2")
+  }
+
+  it should "recognize CREATE DATASOURCE with non-ASCII column names" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE DATASOURCE test1 (ステータス string, count numeric, time boolean)
+      FROM (STORAGE: "hdfs://hello", STREAM: "fluentd://f1", STREAM: "fluentd://f2")
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val ds = result.get.asInstanceOf[CreateDatasource]
+    ds.sourceName shouldBe "test1"
+    ds.columns shouldBe List(("ステータス", "string"), ("count", "numeric"), ("time", "boolean"))
+    ds.sinkStorage shouldBe "hdfs://hello"
+    ds.sinkStreams shouldBe List("fluentd://f1", "fluentd://f2")
+  }
+
   // TODO write more CREATE MODEL tests
 
   it should "recognize CREATE MODEL" taggedAs (LocalTest) in {
@@ -66,16 +103,221 @@ class JubaQLParserSpec extends FlatSpec {
     // use single quotation
     val result: Option[JubaQLAST] = parser.parse(
       """
-      CREATE ANOMALY MODEL test1 WITH(id: "id", datum: ["a", "b"]) config = '{"test": 123}'
+      CREATE CLASSIFIER MODEL test1 (label: l) AS * WITH id CONFIG '{"test": 123}'
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val create = result.get.asInstanceOf[CreateModel]
+    create.algorithm shouldBe "CLASSIFIER"
+    create.modelName shouldBe "test1"
+    create.labelOrId shouldBe Some(("label", "l"))
+    create.featureExtraction shouldBe List((WildcardAnyParameter, "id"))
+    create.configJson shouldBe """{"test": 123}"""
+    //create.specifier shouldBe List(("id", List("id")), ("datum", List("a", "b")))
+  }
+
+  it should "recognize CREATE MODEL with multi-line config" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    // use single quotation
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      |CREATE CLASSIFIER MODEL test1 (label: l) AS * WITH id CONFIG '{"test":
+      |123}'
       """.stripMargin
     )
 
     result shouldNot be(None)
     val create = result.get.asInstanceOf[CreateModel]
-    create.algorithm shouldBe "ANOMALY"
+    create.algorithm shouldBe "CLASSIFIER"
     create.modelName shouldBe "test1"
-    create.configJson shouldBe "{\"test\": 123}"
-    create.specifier shouldBe List(("id", List("id")), ("datum", List("a", "b")))
+    create.labelOrId shouldBe Some(("label", "l"))
+    create.featureExtraction shouldBe List((WildcardAnyParameter, "id"))
+    create.configJson shouldBe "{\"test\":\n123}"
+    //create.specifier shouldBe List(("id", List("id")), ("datum", List("a", "b")))
+  }
+
+  it should "recognize CREATE MODEL with non-ASCII characters" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    // use single quotation
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE CLASSIFIER MODEL test1 (label: ラベル) AS 名前 WITH アイディー CONFIG '{"test": 123}'
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val create = result.get.asInstanceOf[CreateModel]
+    create.algorithm shouldBe "CLASSIFIER"
+    create.modelName shouldBe "test1"
+    create.labelOrId shouldBe Some(("label", "ラベル"))
+    create.featureExtraction shouldBe List((NormalParameters("名前" :: Nil), "アイディー"))
+    create.configJson shouldBe """{"test": 123}"""
+    //create.specifier shouldBe List(("id", List("id")), ("datum", List("a", "b")))
+  }
+
+  it should "recognize CREATE MODEL with non-ASCII characters and wildcards" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    // use single quotation
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE CLASSIFIER MODEL test1 (label: ラベル) AS 名前* WITH アイディー CONFIG '{"test": 123}'
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val create = result.get.asInstanceOf[CreateModel]
+    create.algorithm shouldBe "CLASSIFIER"
+    create.modelName shouldBe "test1"
+    create.labelOrId shouldBe Some(("label", "ラベル"))
+    create.featureExtraction shouldBe List((WildcardWithPrefixParameter("名前"), "アイディー"))
+    create.configJson shouldBe """{"test": 123}"""
+    //create.specifier shouldBe List(("id", List("id")), ("datum", List("a", "b")))
+  }
+
+  it should "recognize CREATE STREAM FROM SELECT" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE STREAM teststream FROM SELECT * FROM test
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromSelect]
+    val create = result.get.asInstanceOf[CreateStreamFromSelect]
+    create.streamName shouldBe "teststream"
+  }
+
+  it should "recognize CREATE STREAM FROM SELECT with non-ASCII characters" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      CREATE STREAM teststream FROM SELECT 名字 AS 名前 FROM test
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromSelect]
+  }
+
+  it should "recognize CREATE TRIGGER" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      |CREATE TRIGGER ON ds FOR EACH ROW EXECUTE someFunction(something)
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    result.get shouldBe a[CreateTrigger]
+    val create = result.get.asInstanceOf[CreateTrigger]
+    create.dsName shouldBe "ds"
+    create.condition shouldBe None
+  }
+
+  it should "recognize CREATE STREAM FROM SLIDING WINDOW" taggedAs(LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+    """
+      |CREATE STREAM new_stream FROM
+      |  SLIDING WINDOW (SIZE 4 ADVANCE 3 TUPLES)
+      |  OVER source
+      |  WITH fourier(some_col) AS fourier_coeffs
+      |  WHERE id % 2 = 0
+      |  HAVING fourier_coeffs = 2
+    """.stripMargin)
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromSlidingWindow]
+    val create = result.get.asInstanceOf[CreateStreamFromSlidingWindow]
+    create.streamName shouldBe "new_stream"
+    create.windowSize shouldBe 4
+    create.slideInterval shouldBe 3
+    create.windowType shouldBe "tuples"
+    create.source.children should not be empty
+    create.source.children(0) shouldBe a[Filter]
+    create.funcSpecs.size shouldBe 1
+    create.funcSpecs(0)._1 shouldBe "fourier"
+    create.funcSpecs(0)._2.size shouldBe 1
+    create.funcSpecs(0)._3 shouldBe Some("fourier_coeffs")
+    create.postCond shouldNot be(None)
+  }
+
+  it should "recognize CREATE STREAM FROM SLIDING WINDOW variations" taggedAs(LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+        |CREATE STREAM new_stream FROM
+        |  SLIDING WINDOW (SIZE 4 ADVANCE 3 TIME)
+        |  OVER source
+        |  WITH histogram(7, some_col), avg(other_col) AS mean
+      """.stripMargin)
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromSlidingWindow]
+    val create = result.get.asInstanceOf[CreateStreamFromSlidingWindow]
+    create.streamName shouldBe "new_stream"
+    create.windowSize shouldBe 4
+    create.slideInterval shouldBe 3
+    create.windowType shouldBe "time"
+    create.source.children should not be empty
+    create.source.children(0) shouldBe a[UnresolvedRelation]
+    create.source.children(0).asInstanceOf[UnresolvedRelation]
+      .tableIdentifier.head shouldBe "source"
+    create.funcSpecs.size shouldBe 2
+    create.funcSpecs(0)._1 shouldBe "histogram"
+    create.funcSpecs(0)._2.size shouldBe 2
+    create.funcSpecs(0)._3 shouldBe None
+    create.funcSpecs(1)._1 shouldBe "avg"
+    create.funcSpecs(1)._2.size shouldBe 1
+    create.funcSpecs(1)._3 shouldBe Some("mean")
+    create.postCond shouldBe None
+  }
+
+  it should "recognize CREATE STREAM FROM ANALYZE" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      |CREATE STREAM emails_classified FROM
+      |ANALYZE emails BY MODEL spam_filter USING classify AS spam
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromAnalyze]
+    val create = result.get.asInstanceOf[CreateStreamFromAnalyze]
+    create.streamName shouldBe "emails_classified"
+    create.analyze.data shouldBe "emails"
+    create.newColumn shouldBe Some("spam")
+  }
+
+  it should "recognize CREATE STREAM FROM ANALYZE without column name" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+        |CREATE STREAM emails_classified FROM
+        |ANALYZE emails BY MODEL spam_filter USING classify
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    result.get shouldBe a[CreateStreamFromAnalyze]
+    val create = result.get.asInstanceOf[CreateStreamFromAnalyze]
+    create.streamName shouldBe "emails_classified"
+    create.analyze.data shouldBe "emails"
+    create.newColumn shouldBe None
+  }
+
+  it should "recognize LOG STREAM" taggedAs(LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+      LOG STREAM source
+      """.stripMargin
+    )
+
+    result shouldNot be(None)
+    val log = result.get.asInstanceOf[LogStream]
+    log.streamName shouldBe "source"
   }
 
   // TODO write more UPDATE tests
@@ -109,7 +351,14 @@ class JubaQLParserSpec extends FlatSpec {
     val analyze = result.get.asInstanceOf[Analyze]
     analyze.modelName shouldBe "juba_model"
     analyze.rpcName shouldBe "calc_score"
-    analyze.data shouldBe "{\"test\": 123}"
+    analyze.data shouldBe """{"test": 123}"""
+  }
+
+  it should "recognize STATUS" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse("STATUS")
+    result should not be empty
+    result.get shouldBe a[Status]
   }
 
   it should "recognize SHUTDOWN" taggedAs (LocalTest) in {
@@ -119,10 +368,93 @@ class JubaQLParserSpec extends FlatSpec {
     result.get shouldBe a[Shutdown]
   }
 
+  it should "recognize START PROCESSING" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse("START PROCESSING someDs")
+    result should not be empty
+    result.get shouldBe a[StartProcessing]
+    result.get.asInstanceOf[StartProcessing].dsName shouldBe("someDs")
+  }
+
   it should "recognize STOP PROCESSING" taggedAs (LocalTest) in {
     val parser = new JubaQLParser
     val result: Option[JubaQLAST] = parser.parse("STOP PROCESSING")
     result should not be empty
     result.get shouldBe a[StopProcessing]
   }
+
+  it should "recognize CREATE FUNCTION" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+    """
+      |CREATE FUNCTION func(arg string) RETURNS string
+      |LANGUAGE JavaScript AS $$ var n = 1; return n; $$
+    """.stripMargin
+    )
+    result should not be empty
+    result.get shouldBe a[CreateFunction]
+    val cf = result.get.asInstanceOf[CreateFunction]
+    cf.funcName shouldBe "func"
+    cf.args shouldBe List(("arg", "string"))
+    cf.returnType shouldBe "string"
+    cf.lang shouldBe "JavaScript"
+  }
+
+  val jsSnippets =
+    ("one simple line", "return x;") ::
+      ("line breaks", "var a = 1;\nreturn x;") ::
+      ("dollar characters", """$.sendMail("me@you.de");""") ::
+      ("multiple dollar characters", """$.sendMail("me@you.de", "money $!");""") ::
+      Nil
+
+  jsSnippets.foreach(kv => {
+    val (desc, js) = kv
+    it should s"recognize CREATE FUNCTION code with $desc" taggedAs (LocalTest) in {
+      val parser = new JubaQLParser
+      val stmtTmpl = """
+                       |CREATE FUNCTION func(arg string) RETURNS string
+                       |LANGUAGE JavaScript AS $$
+                       |%s
+                       |$$
+                     """.stripMargin
+      val stmt = stmtTmpl.format(js)
+      val result = parser.parse(stmt)
+      result should not be empty
+      result.get.asInstanceOf[CreateFunction].body.trim shouldBe js
+    }
+  })
+
+  it should "recognize CREATE FEATURE FUNCTION" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+        |CREATE FEATURE FUNCTION func(arg string)
+        |LANGUAGE JavaScript AS $$ var n = 1; return n; $$
+      """.stripMargin
+    )
+    result should not be empty
+    result.get shouldBe a[CreateFeatureFunction]
+    val cf = result.get.asInstanceOf[CreateFeatureFunction]
+    cf.funcName shouldBe "func"
+    cf.args shouldBe List(("arg", "string"))
+    cf.lang shouldBe "JavaScript"
+    cf.body shouldBe " var n = 1; return n; "
+  }
+
+  it should "recognize CREATE TRIGGER FUNCTION" taggedAs (LocalTest) in {
+    val parser = new JubaQLParser
+    val result: Option[JubaQLAST] = parser.parse(
+      """
+        |CREATE TRIGGER FUNCTION func(arg string)
+        |LANGUAGE JavaScript AS $$ var n = 1; return n; $$
+      """.stripMargin
+    )
+    result should not be empty
+    result.get shouldBe a[CreateTriggerFunction]
+    val cf = result.get.asInstanceOf[CreateTriggerFunction]
+    cf.funcName shouldBe "func"
+    cf.args shouldBe List(("arg", "string"))
+    cf.lang shouldBe "JavaScript"
+    cf.body shouldBe " var n = 1; return n; "
+  }
 }
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLProcessorSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLProcessorSpec.scala
deleted file mode 100644
index 5b6fa32..0000000
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLProcessorSpec.scala
+++ /dev/null
@@ -1,564 +0,0 @@
-// Jubatus: Online machine learning framework for distributed environment
-// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License version 2.1 as published by the Free Software Foundation.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-package us.jubat.jubaql_server.processor
-
-import scala.sys.process._
-import scala.io.Source
-import org.scalatest._
-import java.nio.file.{Paths, Files}
-import dispatch._
-import dispatch.Defaults._
-import org.json4s._
-import org.json4s.JsonDSL._
-import org.json4s.native.JsonMethods._
-import scala.util.{Success, Failure, Try}
-import us.jubat.jubaql_server.processor.json.ClassifierResult
-
-/** Tests the correct behavior as viewed from the outside.
-  */
-class JubaQLProcessorSpec
-  extends FlatSpec
-  with Matchers
-  with HasKafkaPath
-  with BeforeAndAfter
-  with BeforeAndAfterAll {
-
-  implicit val formats = DefaultFormats
-
-  var process: Process = null
-  var stdout: StringBuffer = null
-  var sendJubaQL: String => Try[(Int, JValue)] = null
-
-  before {
-    val startResult = startProcessor()
-    process = startResult._1
-    stdout = startResult._2
-    sendJubaQL = startResult._3
-  }
-
-  after {
-    // all tests should send SHUTDOWN for cleanup, this is just
-    // a fallback to avoid zombie processes "in case of"
-    process.destroy()
-  }
-
-  val goodCdStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
-  val goodCmStmt = """CREATE CLASSIFIER MODEL test1 WITH (label: "label", datum: "name") config = '{"method": "AROW","converter": {  "num_filter_types": {},  "num_filter_rules": [],  "string_filter_types": {},   "string_filter_rules": [],    "num_types": {},  "num_rules": [],"string_types": {"unigram": { "method": "ngram", "char_num": "1" }},"string_rules": [{ "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" } ]},"parameter": {"regularization_weight" : 1.0}}'"""
-  val goodUmStmt = """UPDATE MODEL test1 USING train FROM ds1"""
-  val goodAStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test1 USING classify"""
-
-
-  "CREATE DATASOURCE" should "return HTTP 200 on correct syntax" taggedAs (LocalTest) in {
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    cdResult.get._1 shouldBe 200
-    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return HTTP 500 on bad syntax" taggedAs (LocalTest) in {
-    // TODO no it shouldn't (400 is better)
-    // the statement below is bad because we don't know the protocol hdddddfs
-    val badCdStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "hdddddfs:///jubatus-on-yarn/sample/shogun_data.json")"""
-    val cdResult = sendJubaQL(badCdStmt)
-    cdResult shouldBe a[Success[_]]
-    cdResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-
-  "CREATE MODEL" should "return HTTP 200 on correct syntax" taggedAs (LocalTest, JubatusTest) in {
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    cmResult.get._1 shouldBe 200
-    cmResult.get._2 \ "result" shouldBe JString("CREATE MODEL (started)")
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  // TODO ignored because server currently ignores the bad syntax
-  it should "return HTTP 500 on bad syntax" taggedAs (LocalTest, JubatusTest) ignore {
-    // TODO no it shouldn't (400 is better)
-    // the statement below is bad because "hello" is not a valid keyword
-    val badCmStmt = """CREATE CLASSIFIER MODEL test1 WITH(hello: "label", datum: "name") config = '{"method": "AROW","converter": {  "num_filter_types": {},  "num_filter_rules": [],  "string_filter_types": {},   "string_filter_rules": [],    "num_types": {},  "num_rules": [],"string_types": {"unigram": { "method": "ngram", "char_num": "1" }},"string_rules": [{ "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" } ]},"parameter": {"regularization_weight" : 1.0}}'"""
-    val cmResult = sendJubaQL(badCmStmt)
-    cmResult shouldBe a[Success[_]]
-    cmResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-
-  "UPDATE MODEL" should "return HTTP 200 when model and datasource are present" taggedAs (LocalTest, JubatusTest) in {
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    // start updating
-    val umResult = sendJubaQL(goodUmStmt)
-    umResult shouldBe a[Success[_]]
-    umResult.get._1 shouldBe 200
-    umResult.get._2 \ "result" shouldBe JString("UPDATE MODEL")
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return HTTP 500 when model is missing" taggedAs (LocalTest) in {
-    // TODO no it shouldn't (400 is better)
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    // start updating
-    val umResult = sendJubaQL(goodUmStmt)
-    umResult shouldBe a[Success[_]]
-    umResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return HTTP 500 when data source is missing" taggedAs (LocalTest, JubatusTest) in {
-    // TODO no it shouldn't (400 is better)
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    // start updating
-    val umResult = sendJubaQL(goodUmStmt)
-    umResult shouldBe a[Success[_]]
-    umResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-
-  "ANALYZE" should "return with a meaningful result when UPDATE was run" taggedAs (LocalTest, JubatusTest) in {
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    val umResult = sendJubaQL(goodUmStmt)
-    umResult shouldBe a[Success[_]]
-    // query
-    Thread.sleep(1000)
-    val aResult = sendJubaQL(goodAStmt)
-    aResult shouldBe a[Success[_]]
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return with a meaningful result after STOP PROCESSING" taggedAs (LocalTest, JubatusTest) in {
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    val umResult = sendJubaQL(goodUmStmt)
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(1000)
-    val spResult = sendJubaQL("STOP PROCESSING")
-    spResult shouldBe a[Success[_]]
-    // query
-    val aResult = sendJubaQL(goodAStmt)
-    aResult shouldBe a[Success[_]]
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "work correctly with CLASSIFIER" taggedAs (LocalTest, JubatusTest) in {
-    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
-    val cmResult = sendJubaQL(cmStmt)
-    cmResult shouldBe a[Success[_]]
-
-    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
-    val cdStmt = s"""CREATE CLASSIFIER MODEL test WITH (label: "label", datum: "name") config = '$config'"""
-    val cdResult = sendJubaQL(cdStmt)
-    cdResult shouldBe a[Success[_]]
-
-    val umStmt = """UPDATE MODEL test USING train FROM ds"""
-    val umResult = sendJubaQL(umStmt)
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(2500)
-
-    // analyze
-    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
-    val aResult = sendJubaQL(aStmt)
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // now check the result
-    aResult shouldBe a[Success[_]]
-    if (aResult.get._1 != 200)
-      println(stdout.toString)
-    aResult.get._1 shouldBe 200
-    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
-      case Some(pred) =>
-        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
-        // the order of entries differs per machine/OS, so we use this
-        // slightly complicated way of checking equality
-        scores.keys.toList should contain only("徳川", "足利", "北条")
-        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
-        scores("足利") shouldBe 0.0
-        scores("北条") shouldBe 0.0
-      case None =>
-        fail("Failed to parse returned content as a classifier result")
-    }
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "work correctly with ANOMALY" taggedAs (LocalTest, JubatusTest) in {
-    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
-    val cmResult = sendJubaQL(cmStmt)
-    cmResult shouldBe a[Success[_]]
-
-    val config = Source.fromFile("src/test/resources/lof.json").getLines().mkString("")
-    val cdStmt = s"""CREATE ANOMALY MODEL test WITH (label: "label", datum: "name") config = '$config'"""
-    val cdResult = sendJubaQL(cdStmt)
-    cdResult shouldBe a[Success[_]]
-
-    val umStmt = """UPDATE MODEL test USING add FROM ds"""
-    val umResult = sendJubaQL(umStmt)
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(2500)
-
-    // analyze
-    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING calc_score"""
-    val aResult = sendJubaQL(aStmt)
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // now check the result
-    aResult shouldBe a[Success[_]]
-    if (aResult.get._1 != 200)
-      println(stdout.toString)
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "score" match {
-      case JDouble(score) => {
-        // the result of calc_score seems to differ slightly between
-        // machines/OSes, therefore we do a proximity check instead
-        // of equality comparison
-        Math.abs(score - 1.006646) should be < 0.0005
-      }
-      case _ =>
-        fail("Failed to parse returned content as an anomaly result")
-    }
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "work correctly with RECOMMENDER/from_id" taggedAs (LocalTest, JubatusTest) in {
-    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
-    val cmResult = sendJubaQL(cmStmt)
-    cmResult shouldBe a[Success[_]]
-
-    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
-    val cdStmt = s"""CREATE RECOMMENDER MODEL test WITH (id: "id", datum: ["team", "打率", "試合数", "打席", "打数", "安打", "本塁打", "打点", "盗塁", "四球", "死球", "三振", "犠打", "併殺打", "長打率", "出塁率", "OPS", "RC27", "XR27"]) config = '$config'"""
-    val cdResult = sendJubaQL(cdStmt)
-    cdResult shouldBe a[Success[_]]
-
-    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
-    val umResult = sendJubaQL(umStmt)
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(2500)
-
-    // analyze
-    val aStmt = """ANALYZE 'スレッジ' BY MODEL test USING complete_row_from_id"""
-    val aResult = sendJubaQL(aStmt)
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // now check the result
-    aResult shouldBe a[Success[_]]
-    if (aResult.get._1 != 200)
-      println(stdout.toString)
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "num_values" match {
-      case JObject(list) =>
-        val vals = list.collect({
-          case (s, JDouble(j)) => (s, j)
-        }).toMap
-        Math.abs(vals("長打率") - 0.3539453148841858) should be < 0.00001
-        Math.abs(vals("試合数") - 104.234375) should be < 0.00001
-        Math.abs(vals("打数") - 331.5546875) should be < 0.00001
-      case _ =>
-        fail("there was no 'num_values' key")
-    }
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "work correctly with RECOMMENDER/from_data" taggedAs (LocalTest, JubatusTest) in {
-    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
-    val cmResult = sendJubaQL(cmStmt)
-    cmResult shouldBe a[Success[_]]
-
-    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
-    val cdStmt = s"""CREATE RECOMMENDER MODEL test WITH (id: "id", datum: ["team", "打率", "試合数", "打席", "打数", "安打", "本塁打", "打点", "盗塁", "四球", "死球", "三振", "犠打", "併殺打", "長打率", "出塁率", "OPS", "RC27", "XR27"]) config = '$config'"""
-    val cdResult = sendJubaQL(cdStmt)
-    cdResult shouldBe a[Success[_]]
-
-    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
-    val umResult = sendJubaQL(umStmt)
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(2500)
-
-    // analyze
-    val aStmt = """ANALYZE '{"team":"巨人","打率":0.209,"試合数":65.0,"打席":149.0,"打数":129.0,"安打":27.0,"本塁打":0.0,"打点":8.0,"盗塁":2.0,"四球":12.0,"死球":0.0,"三振":28.0,"犠打":6.0,"併殺打":5.0,"長打率":0.256,"出塁率":0.273,"OPS":0.529,"RC27":1.96,"XR27":2.07}' BY MODEL test USING complete_row_from_datum"""
-    val aResult = sendJubaQL(aStmt)
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // now check the result
-    aResult shouldBe a[Success[_]]
-    if (aResult.get._1 != 200)
-      println(stdout.toString)
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "num_values" match {
-      case JObject(list) =>
-        val vals = list.collect({
-          case (s, JDouble(j)) => (s, j)
-        }).toMap
-        Math.abs(vals("長打率") - 0.33874213695526123) should be < 0.00001
-        Math.abs(vals("試合数") - 100.953125) should be < 0.00001
-        Math.abs(vals("打数") - 307.8046875) should be < 0.00001
-      case _ =>
-        fail("there was no 'num_values' key")
-    }
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "still return HTTP 200 when UPDATE was not run" taggedAs (LocalTest, JubatusTest) in {
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    // query
-    val aResult = sendJubaQL(goodAStmt)
-    aResult shouldBe a[Success[_]]
-    aResult.get._1 shouldBe 200
-    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return HTTP 500 on bad syntax" taggedAs (LocalTest, JubatusTest) in {
-    // TODO no it shouldn't (400 is better)
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    // the below statement is bad because it references a nonexisting algorithm
-    val aResult = sendJubaQL( """ANALYZE '{"name": "慶喜"}' BY MODEL test1 USING aNonExistingAlgorithm""")
-    aResult shouldBe a[Success[_]]
-    aResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-  it should "return HTTP 500 when there is no model" taggedAs (LocalTest) in {
-    // TODO no it shouldn't (400 is better)
-    val cdResult = sendJubaQL(goodCdStmt)
-    cdResult shouldBe a[Success[_]]
-    val aResult = sendJubaQL(goodAStmt)
-    aResult shouldBe a[Success[_]]
-    aResult.get._1 shouldBe 500
-    // shut down
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-  }
-
-
-  "SHUTDOWN" should "stop the running instance" taggedAs (LocalTest) in {
-    // send only SHUTDOWN
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    sdResult.get._1 shouldBe 200
-    sdResult.get._2 \ "result" shouldBe a[JString]
-    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-    stdout.toString should include("shut down successfully")
-  }
-
-  it should "stop even after datasource was created" taggedAs (LocalTest) in {
-    // set up a data source
-    val cmResult = sendJubaQL(goodCdStmt)
-    cmResult shouldBe a[Success[_]]
-    // send only SHUTDOWN
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    sdResult.get._1 shouldBe 200
-    sdResult.get._2 \ "result" shouldBe a[JString]
-    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-    stdout.toString should include("shut down successfully")
-  }
-
-  it should "stop even after model was created" taggedAs (LocalTest, JubatusTest) in {
-    // set up a data source
-    val cmResult = sendJubaQL(goodCmStmt)
-    cmResult shouldBe a[Success[_]]
-    // send only SHUTDOWN
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    sdResult.get._1 shouldBe 200
-    sdResult.get._2 \ "result" shouldBe a[JString]
-    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-    stdout.toString should include("shut down successfully")
-  }
-
-  it should "stop within a moderate time even when data is processed" taggedAs (HDFSTest, JubatusTest) in {
-    val cmResult = sendJubaQL( """CREATE CLASSIFIER MODEL test1 WITH(label: "movie_type", datum: ["title", "description"]) config = '{"method": "AROW","converter": {  "num_filter_types": {},  "num_filter_rules": [],  "string_filter_types": {},   "string_filter_rules": [],    "num_types": {},  "num_rules": [],"string_types": {"unigram": { "method": "ngram", "char_num": "1" }},"string_rules": [{ "key": "*", "type": "unigram", "sample_weight": "bin", "global_weight": "bin" } ]},"parameter": {"regularization_weight" : 1.0}}'""")
-    cmResult shouldBe a[Success[_]]
-    val cdResult = sendJubaQL( s"""CREATE DATASOURCE ds1 (movie_type string, title string, description string) FROM (STORAGE: "hdfs:///user/fluentd/dummy", STREAM: "kafka://$kafkaPath/dummy/1")""")
-    cdResult shouldBe a[Success[_]]
-    // start updating
-    val umResult = sendJubaQL( """UPDATE MODEL test1 USING train FROM ds1""")
-    umResult shouldBe a[Success[_]]
-    Thread.sleep(5000)
-    // shut down
-    val startTime = System.nanoTime()
-    val sdResult = sendJubaQL("SHUTDOWN")
-    sdResult shouldBe a[Success[_]]
-    sdResult.get._2 \ "result" shouldBe a[JString]
-    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
-    // wait until shutdown
-    val exitValue = process.exitValue()
-    exitValue shouldBe 0
-    val executionTime = (System.nanoTime() - startTime)
-    executionTime.toDouble should be < 25e9 // less than 25 seconds
-  }
-
-
-  override protected def beforeAll(): Unit = {
-    // if there is no script to start the application yet, generate it
-    if (!Files.exists(Paths.get("start-script/run"))) {
-      Seq("sbt", "start-script").!
-    }
-    super.beforeAll()
-  }
-
-  protected def startProcessor(): (Process,
-    StringBuffer, String => Try[(Int, JValue)]) = {
-    val command = Seq("./start-script/run")
-    val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
-    val process = command run logger
-    val port = getServerPort(stdoutBuffer)
-    (process, stdoutBuffer, sendJubaQLTo(port))
-  }
-
-  protected def getProcessLogger(): (ProcessLogger, StringBuffer, StringBuffer) = {
-    val stdoutBuffer = new StringBuffer()
-    val stderrBuffer = new StringBuffer()
-    val logger = ProcessLogger(line => {
-      stdoutBuffer append line
-      stdoutBuffer append "\n"
-    },
-      line => {
-        stderrBuffer append line
-        stderrBuffer append "\n"
-      })
-    (logger, stdoutBuffer, stderrBuffer)
-  }
-
-  protected def getServerPort(stdout: StringBuffer): Int = {
-    val portRe = "(?s).+listening on port ([0-9]+)\n".r
-    var port = 0
-    while (port == 0) {
-      stdout.toString match {
-        case portRe(loggedPort) =>
-          port = loggedPort.toInt
-        case _ =>
-          Thread.sleep(100)
-      }
-    }
-    port
-  }
-
-  protected def sendJubaQLTo(port: Int)(stmt: String): Try[(Int, JValue)] = {
-    val url = :/("localhost", port) / "jubaql"
-    val body = compact(render("query" -> stmt))
-    Http(url << body).either.apply() match {
-      case Left(error) =>
-        Failure(error)
-      case Right(response) =>
-        Try {
-          (response.getStatusCode,
-            parse(response.getResponseBody("UTF-8")))
-        }
-    }
-  }
-}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLServiceHelperSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLServiceHelperSpec.scala
index 4525dc8..a59309e 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLServiceHelperSpec.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/JubaQLServiceHelperSpec.scala
@@ -16,6 +16,7 @@
 package us.jubat.jubaql_server.processor
 
 import org.scalatest.{ShouldMatchers, BeforeAndAfterAll, FlatSpec}
+import org.scalatest.EitherValues._
 import org.apache.spark.SparkContext
 
 /* This test case tests only the state-independent (helper) functions of
@@ -30,8 +31,8 @@ class JubaQLServiceHelperSpec extends FlatSpec with ShouldMatchers with BeforeAn
   private var service: JubaQLServiceTester = null
 
   // create a subclass to test the protected methods
-  class JubaQLServiceTester(sc: SparkContext) extends JubaQLService(sc, RunMode.Development) {
-    override def parseJson(in: String): Option[JubaQLAST] =
+  class JubaQLServiceTester(sc: SparkContext) extends JubaQLService(sc, RunMode.Development, "file:///tmp/spark") {
+    override def parseJson(in: String): Either[(Int, String), JubaQLAST] =
       super.parseJson(in)
   }
 
@@ -42,8 +43,7 @@ class JubaQLServiceHelperSpec extends FlatSpec with ShouldMatchers with BeforeAn
                 """.stripMargin.trim
     val json = """{"query": "%s"}""".format(query.replace("\"", "\\\""))
     val result = service.parseJson(json)
-    result should not be empty
-    result.get shouldBe a[CreateDatasource]
+    result.right.value shouldBe a[CreateDatasource]
   }
 
   it should "be able to parse JSON with additional fields" taggedAs (LocalTest) in {
@@ -53,32 +53,31 @@ class JubaQLServiceHelperSpec extends FlatSpec with ShouldMatchers with BeforeAn
                 """.stripMargin.trim
     val json = """{"session_id": "test", "query": "%s"}""".format(query.replace("\"", "\\\""))
     val result = service.parseJson(json)
-    result should not be empty
-    result.get shouldBe a[CreateDatasource]
+    result.right.value shouldBe a[CreateDatasource]
   }
 
-  it should "yield None if the JSON contains a bogus query" taggedAs (LocalTest) in {
+  it should "return an error if the JSON contains a bogus query" taggedAs (LocalTest) in {
     val json = """{"query": "test"}"""
     val result = service.parseJson(json)
-    result shouldBe empty
+    result.left.value._1 shouldBe 400
   }
 
-  it should "yield None if the JSON contains a non-string query" taggedAs (LocalTest) in {
+  it should "return an error if the JSON contains a non-string query" taggedAs (LocalTest) in {
     val json = """{"query": 27}"""
     val result = service.parseJson(json)
-    result shouldBe empty
+    result.left.value._1 shouldBe 400
   }
 
-  it should "yield None if the JSON contains no query" taggedAs (LocalTest) in {
+  it should "return an error if the JSON contains no query" taggedAs (LocalTest) in {
     val json = """{"foo": "bar"}"""
     val result = service.parseJson(json)
-    result shouldBe empty
+    result.left.value._1 shouldBe 400
   }
 
-  it should "yield None if the string is no JSON" taggedAs (LocalTest) in {
-    val json = """{"foo": "bar"}"""
+  it should "return an error if the string is no JSON" taggedAs (LocalTest) in {
+    val json = """hello"""
     val result = service.parseJson(json)
-    result shouldBe empty
+    result.left.value._1 shouldBe 400
   }
 
   override protected def beforeAll(): Unit = {
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/LocalJubatusApplicationSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/LocalJubatusApplicationSpec.scala
index 52b027b..cb1f76a 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/LocalJubatusApplicationSpec.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/LocalJubatusApplicationSpec.scala
@@ -145,6 +145,29 @@ class LocalJubatusApplicationSpec extends FlatSpec with ShouldMatchers {
     }
   }
 
+  it should "be startable twice" taggedAs (LocalTest, JubatusTest) in {
+    val f1 = LocalJubatusApplication.start("bar", LearningMachineType.Classifier, classifierConfig)
+    Await.ready(f1, Duration.Inf)
+    val result1 = f1.value.get
+    val f2 = LocalJubatusApplication.start("bar", LearningMachineType.Classifier, classifierConfig)
+    Await.ready(f2, Duration.Inf)
+    val result2 = f2.value.get
+    result1 shouldBe a[Success[_]]
+    result2 shouldBe a[Success[_]]
+    result1.get.jubatusProxy.port shouldBe 9199
+    result2.get.jubatusProxy.port shouldBe 9200
+    result1 match {
+      case Success(app) =>
+        Await.ready(app.stop(), Duration.Inf)
+      case _ =>
+    }
+    result2 match {
+      case Success(app) =>
+        Await.ready(app.stop(), Duration.Inf)
+      case _ =>
+    }
+  }
+
   "jubarecommender" should "start" taggedAs (LocalTest, JubatusTest) in {
     val f = LocalJubatusApplication.start("baz", LearningMachineType.Recommender, recommenderConfig)
     Await.ready(f, Duration.Inf)
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/ProcessUtil.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/ProcessUtil.scala
new file mode 100644
index 0000000..57567b0
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/ProcessUtil.scala
@@ -0,0 +1,12 @@
+package us.jubat.jubaql_server.processor
+
+import scala.sys.process.{Process, ProcessBuilder}
+
+object ProcessUtil {
+  /**
+   * Returns a ProcessBuilder with an environment variable for checkpointDir.
+   */
+  def commandToProcessBuilder(command: Seq[String]): ProcessBuilder = {
+    Process(command, None, "JAVA_OPTS" -> "-Djubaql.checkpointdir=file:///tmp/spark")
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/RegistrationSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/RegistrationSpec.scala
index dac54ef..d78ab39 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/RegistrationSpec.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/RegistrationSpec.scala
@@ -59,12 +59,15 @@ class RegistrationSpec extends FlatSpec with Matchers with MockServer {
     (logger, stdoutBuffer, stderrBuffer)
   }
 
+  import ProcessUtil.commandToProcessBuilder
+
   // First, check for invalid input
 
   "Passing an invalid string as URL" should "print an error and exit" taggedAs (LocalTest) in {
     val command = Seq("./start-script/run", "xyz")
+    val pb = commandToProcessBuilder(command)
     val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
-    val exitCode = command ! logger
+    val exitCode = pb ! logger
     // check exit code and console output
     exitCode shouldBe 1
     stdoutBuffer.toString should include("invalid URL provided")
@@ -73,8 +76,9 @@ class RegistrationSpec extends FlatSpec with Matchers with MockServer {
   "Passing an URL of a non-existing server" should "print an error and exit" in {
     // start the client and specify a server that does (probably) not exist
     val command = Seq("./start-script/run", "http://lameiq2elakliajdlawkidl.jp/hoge")
+    val pb = commandToProcessBuilder(command)
     val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
-    val exitCode = command ! logger
+    val exitCode = pb ! logger
     // check exit code and console output
     exitCode shouldBe 1
     stdoutBuffer.toString should include("registration failed: " +
@@ -120,9 +124,10 @@ class RegistrationSpec extends FlatSpec with Matchers with MockServer {
 
   "Passing the URL of a gateway-like server" should "register there" taggedAs (LocalTest) in {
     val command = Seq("./start-script/run", "http://localhost:9877/test/reg1")
+    val pb = commandToProcessBuilder(command)
     val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
     // run the command
-    val process = command run logger
+    val process = pb run logger
     var submittedJson: Option[JValue] = None
     var waitedTime = 0
     // wait until we receive data or reach timeout
@@ -145,9 +150,10 @@ class RegistrationSpec extends FlatSpec with Matchers with MockServer {
 
   it should "unregister after receiving SIGTERM" taggedAs (LocalTest) in {
     val command = Seq("./start-script/run", "http://localhost:9877/test/unreg1")
+    val pb = commandToProcessBuilder(command)
     val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
     // run the command
-    val process = command run logger
+    val process = pb run logger
     // wait until registration is complete
     var waitedTime = 0
     while (registerData.get("unreg1").isEmpty && waitedTime < 20000) {
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/SchemaDStreamSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/SchemaDStreamSpec.scala
new file mode 100644
index 0000000..a9bb756
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/SchemaDStreamSpec.scala
@@ -0,0 +1,177 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.sql.catalyst.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.streaming.dstream.{ConstantInputDStream, DStream}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.scalatest._
+
+import scala.collection.mutable
+import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.future
+
+class SchemaDStreamSpec extends FlatSpec
+with ShouldMatchers
+with BeforeAndAfter
+with BeforeAndAfterAll {
+  val sc = new SparkContext("local[3]", "SlidingWindow")
+  val dummyData = sc.parallelize(
+    """{"gender":"m","age":21,"jubaql_timestamp":"2014-11-21T15:52:21.943321112"}""" ::
+      """{"gender":"f","age":22,"jubaql_timestamp":"2014-11-21T15:52:22"}""" ::
+      """{"gender":"f","age":23,"jubaql_timestamp":"2014-11-21T15:52:23.123"}""" ::
+      Nil)
+  var ssc: StreamingContext = null
+  var sqlc: SQLContext = null
+
+  before {
+    ssc = new StreamingContext(sc, Seconds(1))
+    sqlc = new SQLContext(sc)
+  }
+
+  "fromStringStream" should "create a queryable stream" in {
+    val rawStream: DStream[String] = new ConstantInputDStream(ssc, dummyData)
+    SchemaDStream.fromStringStream(sqlc, rawStream, Some("hoge"))
+
+    val collectedData = mutable.ListBuffer[Array[Row]]()
+    rawStream.foreachRDD(_ => {
+      val data = sqlc.sql("SELECT age FROM hoge").collect()
+      collectedData += data
+    })
+    waitUntilProcessingEnds(rawStream, 1)
+
+    collectedData.size should be > (0)
+    val firstBatch = collectedData(0)
+    firstBatch.size shouldBe 3
+    firstBatch(0).getInt(0) shouldBe 21
+    firstBatch(1).getInt(0) shouldBe 22
+    firstBatch(2).getInt(0) shouldBe 23
+  }
+
+  "fromStringStreamWithSchema" should "create a queryable stream" in {
+    val rawStream: DStream[String] = new ConstantInputDStream(ssc, dummyData)
+    val schema = StructType(StructField("age", IntegerType, nullable = false) :: Nil)
+    SchemaDStream.fromStringStreamWithSchema(sqlc, rawStream, schema, Some("hoge"))
+
+    val collectedData = mutable.ListBuffer[Array[Row]]()
+    rawStream.foreachRDD(_ => {
+      val data = sqlc.sql("SELECT age FROM hoge").collect()
+      collectedData += data
+    })
+    waitUntilProcessingEnds(rawStream, 1)
+
+    collectedData.size should be > (0)
+    val firstBatch = collectedData(0)
+    firstBatch.size shouldBe 3
+    firstBatch(0).getInt(0) shouldBe 21
+    firstBatch(1).getInt(0) shouldBe 22
+    firstBatch(2).getInt(0) shouldBe 23
+  }
+
+  "fromRDDTransformation" should "transform registered RDDs" in {
+    val rawStream: DStream[String] = new ConstantInputDStream(ssc, dummyData)
+    SchemaDStream.fromStringStream(sqlc, rawStream, Some("hoge"))
+
+    val modified = SchemaDStream.fromRDDTransformation(ssc, sqlc, "hoge", rdd => {
+      val newSchema = StructType(StructField("len", IntegerType, nullable = false) :: Nil)
+      val newRdd = rdd.map(row => {
+        Row(row.getString(2).size)
+      })
+      sqlc.applySchema(newRdd, newSchema)
+    }, Some("foo"))
+
+    val collectedData = mutable.ListBuffer[Array[Row]]()
+    rawStream.foreachRDD(_ => {
+      val data = sqlc.sql("SELECT len FROM foo").collect()
+      collectedData += data
+    })
+    waitUntilProcessingEnds(rawStream, 1)
+
+    collectedData.size should be > (0)
+    val firstBatch = collectedData(0)
+    firstBatch.size shouldBe 3
+    firstBatch(0).getInt(0) shouldBe 29
+    firstBatch(1).getInt(0) shouldBe 19
+    firstBatch(2).getInt(0) shouldBe 23
+  }
+
+  "fromTableName" should "return registered streams" in {
+    val rawStream: DStream[String] = new ConstantInputDStream(ssc, dummyData)
+    SchemaDStream.fromStringStream(sqlc, rawStream, Some("hoge"))
+
+    val stream = SchemaDStream.fromTableName(ssc, sqlc, "hoge")
+
+    val collectedData = mutable.ListBuffer[Array[Row]]()
+    stream.foreachRDD(rdd => {
+      collectedData += rdd.collect()
+    })
+    waitUntilProcessingEnds(rawStream, 1)
+
+    collectedData.size should be > (0)
+    val firstBatch = collectedData(0)
+    firstBatch.size shouldBe 3
+    firstBatch(0).getInt(0) shouldBe 21
+    firstBatch(1).getInt(0) shouldBe 22
+    firstBatch(2).getInt(0) shouldBe 23
+  }
+
+  "fromSQL" should "find registered streams and register query outputs" in {
+    val rawStream: DStream[String] = new ConstantInputDStream(ssc, dummyData)
+    SchemaDStream.fromStringStream(sqlc, rawStream, Some("hoge"))
+
+    SchemaDStream.fromSQL(ssc, sqlc,
+      "SELECT jubaql_timestamp, age AS bar FROM hoge", Some("foo"))
+
+    val collectedData = mutable.ListBuffer[Array[Row]]()
+    rawStream.foreachRDD(_ => {
+      val data = sqlc.sql("SELECT bar FROM foo").collect()
+      collectedData += data
+    })
+    waitUntilProcessingEnds(rawStream, 1)
+
+    collectedData.size should be > (0)
+    val firstBatch = collectedData(0)
+    firstBatch.size shouldBe 3
+    firstBatch(0).getInt(0) shouldBe 21
+    firstBatch(1).getInt(0) shouldBe 22
+    firstBatch(2).getInt(0) shouldBe 23
+  }
+
+  override def afterAll(): Unit = {
+    println("stopping SparkContext")
+    sc.stop()
+    super.afterAll()
+  }
+
+  protected def waitUntilProcessingEnds(stream: DStream[_], numIterations: Int) = {
+    // count up in every interval
+    val i = sc.accumulator(0)
+    stream.foreachRDD(rdd => i += 1)
+    // start processing
+    ssc.start()
+    // stop streaming context when i has become numIterations
+    future {
+      while (i.value < numIterations + 1)
+        Thread.sleep(100)
+      ssc.stop(stopSparkContext = false, stopGracefully = true)
+    }
+    // wait for termination
+    ssc.awaitTermination()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/SlidingStreamSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/SlidingStreamSpec.scala
new file mode 100644
index 0000000..45a3f0d
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/SlidingStreamSpec.scala
@@ -0,0 +1,278 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.joda.time.format.ISODateTimeFormat
+import org.scalatest._
+
+import scala.collection.mutable
+import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.future
+
+class SlidingStreamSpec extends FeatureSpec
+with GivenWhenThen
+with ShouldMatchers
+with BeforeAndAfterAll {
+  val sc = new SparkContext("local[3]", "SlidingWindow")
+  sc.setCheckpointDir("file:///tmp/spark")
+  var ssc: StreamingContext = null
+
+  feature("Count-based sliding windows") {
+    ssc = new StreamingContext(sc, Seconds(1))
+
+    Given("a simple DStream")
+    type T = Char
+    val rawData = "abcd" :: "ef" :: "ghijklmn" :: "o" :: "pq" :: "rstuvwxyz" :: Nil
+    val rawDataQueue = new mutable.Queue[List[T]]()
+    rawData.foreach(rawDataQueue += _.toList)
+    val itemsQueue = rawDataQueue.map(s => sc.parallelize(s.toList))
+    val inputStream: DStream[T] = ssc.queueStream(itemsQueue, oneAtATime = true)
+
+    // set up sliding window streams for various window parameters.
+    // (we need to set this up before the scenario() block to compute
+    // all of them within the same StreamingContext run...)
+    val streams = for (length <- 1 to 4;
+                       step <- Math.max(1, length - 1) to (length + 1)) yield {
+      val windowStream = SlidingWindow.byCount(inputStream, length, step)
+      // add the computed windows in each interval to a mutable list
+      // for analysis
+      val windowsPerInterval = mutable.ListBuffer[List[(Long, List[(Long, T)])]]()
+      windowStream.foreachRDD(rdd => {
+        val windows = rdd.groupByKey().mapValues(_.toList).collect().toList
+        windowsPerInterval += windows
+      })
+      (length, step, windowsPerInterval)
+    }
+
+    waitUntilProcessingEnds(inputStream, rawData.size)
+
+    for ((length, step, windowsPerInterval) <- streams) {
+      checkCountResult(length, step, windowsPerInterval, rawData.map(_.toList))
+    }
+  }
+
+
+  feature("Count-based sliding windows (weird data distribution)") {
+    ssc = new StreamingContext(sc, Seconds(1))
+
+    Given("a weird DStream")
+    type T = Char
+    val rawData = "ab" :: "" :: "cde" :: "f" :: "ghijklmnopqrstuvwxy" :: "z" :: Nil
+    val rawDataQueue = new mutable.Queue[List[T]]()
+    rawData.foreach(rawDataQueue += _.toList)
+    val itemsQueue = rawDataQueue.map(s => sc.parallelize(s.toList))
+    val inputStream: DStream[T] = ssc.queueStream(itemsQueue, oneAtATime = true)
+
+    // set up sliding window streams for various window parameters.
+    // (we need to set this up before the scenario() block to compute
+    // all of them within the same StreamingContext run...)
+    val streams = for (length <- 4 to 4;
+                       step <- 1 to (length + 1)) yield {
+      val windowStream = SlidingWindow.byCount(inputStream, length, step)
+      // add the computed windows in each interval to a mutable list
+      // for analysis
+      val windowsPerInterval = mutable.ListBuffer[List[(Long, List[(Long, T)])]]()
+      windowStream.foreachRDD(rdd => {
+        val windows = rdd.groupByKey().mapValues(_.toList).collect().toList
+        windowsPerInterval += windows
+      })
+      (length, step, windowsPerInterval)
+    }
+
+    waitUntilProcessingEnds(inputStream, rawData.size)
+
+    for ((length, step, windowsPerInterval) <- streams) {
+      checkCountResult(length, step, windowsPerInterval, rawData.map(_.toList))
+    }
+  }
+
+
+  feature("Timestamp-based sliding windows") {
+    ssc = new StreamingContext(sc, Seconds(1))
+    ssc.checkpoint("file:///tmp/spark")
+
+    Given("a simple DStream")
+    val parser = ISODateTimeFormat.dateHourMinuteSecondFraction()
+    type T = String
+    val rawDataUnparsed =
+      List(("a", "2015-01-09T16:23:34.031128377"), // 1420788214031
+        ("b", "2015-01-09T16:23:34.035617484"), // 1420788214035
+        ("c", "2015-01-09T16:23:34.132088288"), // 1420788214132
+        ("d", "2015-01-09T16:23:35.136510729"), // 1420788215136
+        ("e", "2015-01-09T16:23:36.004067229"), // 1420788216004
+        ("f", "2015-01-09T16:23:36.039922085"), // 1420788216039
+        ("g", "2015-01-09T16:23:36.106793425"), // 1420788216106
+        ("h", "2015-01-09T16:23:36.140707388"), // 1420788216140
+        ("i", "2015-01-09T16:23:38.010804037")) :: // 1420788218010
+        List(("j", "2015-01-09T16:23:38.111558838"), // 1420788218111
+          ("k", "2015-01-09T16:23:40.015171109"), // 1420788220015
+          ("l", "2015-01-09T16:23:40.020104192"), // 1420788220020
+          ("m", "2015-01-09T16:23:40.116416331"), // 1420788220116
+          ("n", "2015-01-09T16:23:40.121018469")) :: // 1420788220121
+        List() ::
+        List(("o", "2015-01-09T16:23:41.024491603")) :: // 1420788221024
+        List(("p", "2015-01-09T16:23:41.125453953"), // 1420788221125
+          ("q", "2015-01-09T16:23:42.029155492"), // 1420788222029
+          ("r", "2015-01-09T16:23:42.129897677"), // 1420788222129
+          ("s", "2015-01-09T16:23:43.033451709"), // 1420788223033
+          ("t", "2015-01-09T16:23:43.134198126"), // 1420788223134
+          ("u", "2015-01-09T16:23:44.037491118"), // 1420788224037
+          ("v", "2015-01-09T16:23:44.138379275")) :: // 1420788224138
+        List(("w", "2015-01-09T16:23:45.001737653"), // 1420788225001
+          ("x", "2015-01-09T16:23:45.102486660"), // 1420788225102
+          ("y", "2015-01-09T16:23:46.006160454"), // 1420788226006
+          ("z", "2015-01-09T16:23:46.107519895")) :: // 1420788226107
+        Nil
+    val rawData = rawDataUnparsed.map(_.map(item => {
+      val (data, timestamp) = item
+      (parser.parseMillis(timestamp), data)
+    }))
+    val rawDataQueue = new mutable.Queue[List[(Long, T)]]()
+    rawData.foreach(rawDataQueue += _)
+    val itemsQueue = rawDataQueue.map(s => sc.parallelize(s))
+    val inputStream: DStream[(Long, T)] = ssc.queueStream(itemsQueue, oneAtATime = true)
+
+    // set up sliding window streams for various window parameters.
+    // (we need to set this up before the scenario() block to compute
+    // all of them within the same StreamingContext run...)
+    val streams = for (length <- 3 to 4;
+                       step <- Math.max(1, length - 1) to (length + 1)) yield {
+      //val step = 3
+      val windowStream = SlidingWindow.byTimestamp(inputStream, length, step)
+      // add the computed windows in each interval to a mutable list
+      // for analysis
+      val windowsPerInterval = mutable.ListBuffer[List[(Long, List[(Long, T)])]]()
+      windowStream.foreachRDD(rdd => {
+        val windows = rdd.groupByKey().mapValues(_.toList).collect().toList
+        windowsPerInterval += windows
+      })
+      (length, step, windowsPerInterval)
+    }
+
+    waitUntilProcessingEnds(inputStream, rawData.size)
+
+    for ((length, step, windowsPerInterval) <- streams) {
+      checkTimestampResult(length * 1000, step * 1000, windowsPerInterval, rawData)
+    }
+  }
+
+
+  override def afterAll(): Unit = {
+    sc.stop()
+    super.afterAll()
+  }
+
+  protected def checkCountResult[T](length: Int, step: Int,
+                                    windowsPerInterval: Seq[List[(Long, List[(Long, T)])]],
+                                    rawData: List[List[T]]) = {
+    scenario(s"Window length $length and step size $step") {
+      When("we compute sliding windows by count")
+
+      Then("the group indexes in every interval should be continuous")
+      val minMaxIndexes = windowsPerInterval.filterNot(_.isEmpty).map(windows => {
+        val groupIndexes = windows.map(_._1)
+        val minGroupIndex = groupIndexes.min
+        val maxGroupIndex = groupIndexes.max
+        // an interval should contain all group indexes between min and max group
+        // index (i.e., there should be no gaps within an interval)
+        groupIndexes.sorted shouldBe ((minGroupIndex to maxGroupIndex).toList)
+        (minGroupIndex, maxGroupIndex)
+      })
+
+      And("they should increase between intervals")
+      // i.e. there should be no gaps between intervals
+      minMaxIndexes.take(minMaxIndexes.size - 1).zip(minMaxIndexes.tail).foreach(pair => {
+        val (previousIndexes, currentIndexes) = pair
+        previousIndexes._2 + 1 shouldBe (currentIndexes._1)
+      })
+
+      And("all groups should have the correct elements")
+      val sortedWindows = windowsPerInterval.reduceLeft(_ ++ _).sortBy(_._1)
+      val ourSlidingWindows: List[Seq[T]] = sortedWindows.map(groupWithIdx =>
+        groupWithIdx._2.toSeq.sortBy(_._1).map(_._2))
+      // compare with the iterator.sliding() results
+      val slidingIterator = rawData.reduceLeft(_ ++ _).iterator.sliding(length, step)
+      val completeWindows = slidingIterator.filter(_.size == length).toList
+      // (maybe) drop last window because that will never be completed
+      // in our implementation
+      val referenceSlidingWindows = completeWindows.take(ourSlidingWindows.size)
+      ourSlidingWindows shouldBe (referenceSlidingWindows)
+
+      info(ourSlidingWindows.toString().take(40) + " ...")
+    }
+  }
+
+  protected def checkTimestampResult[T](length: Int, step: Int,
+                                        windowsPerInterval: Seq[List[(Long, List[(Long, T)])]],
+                                        rawData: List[List[(Long, T)]]) = {
+    scenario(s"Window length $length and step size $step") {
+      When("we compute sliding windows by count")
+
+      Then("the group timestamps should increase between intervals")
+      val minMaxTimestamps = windowsPerInterval.filterNot(_.isEmpty).map(windows => {
+        val groupTimestamps = windows.map(_._1)
+        (groupTimestamps.min, groupTimestamps.max)
+      })
+      minMaxTimestamps.take(minMaxTimestamps.size - 1).zip(minMaxTimestamps.tail).foreach(pair => {
+        val (previousTimestamps, currentTimestamps) = pair
+        previousTimestamps._2 shouldBe <(currentTimestamps._1)
+      })
+
+      And("all groups should have the correct elements")
+      val sortedWindows = windowsPerInterval.reduceLeft(_ ++ _).sortBy(_._1)
+      val ourSlidingWindows: List[(Long, Seq[(Long, T)])] = sortedWindows.map(groupWithIdx =>
+        (groupWithIdx._1, groupWithIdx._2.sortBy(_._1)))
+
+      // compare with an inefficient, but probably correct implementation
+      val allRawData = rawData.reduceLeft(_ ++ _).sortBy(_._1)
+      val batchSize = ((length - 1) / step + 1) * step
+      val minGroupTimestamp = allRawData.map(_._1).min / batchSize * batchSize
+      val maxGroupTimestamp = allRawData.map(_._1).max / batchSize * batchSize
+      val slidingIterator =
+        (for (groupTimestamp <- minGroupTimestamp to maxGroupTimestamp by step) yield {
+          val items = allRawData.filter(kv =>
+            kv._1 >= groupTimestamp && kv._1 < groupTimestamp + length
+          )
+          (groupTimestamp, items)
+        }).filterNot(_._2.isEmpty).toList
+
+      // (maybe) drop last window because that will never be completed
+      // in our implementation
+      val referenceSlidingWindows = slidingIterator.take(ourSlidingWindows.size)
+      ourSlidingWindows shouldBe (referenceSlidingWindows)
+    }
+  }
+
+  protected def waitUntilProcessingEnds(stream: DStream[_], numIterations: Int) = {
+    // count up in every interval
+    val i = sc.accumulator(0)
+    stream.foreachRDD(rdd => i += 1)
+    // start processing
+    ssc.start()
+    // stop streaming context when i has become numIterations
+    future {
+      while (i.value < numIterations + 1)
+        Thread.sleep(100)
+      ssc.stop(stopSparkContext = false, stopGracefully = true)
+    }
+    // wait for termination
+    ssc.awaitTermination()
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/TestTags.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/TestTags.scala
index 2e918ef..2663849 100644
--- a/processor/src/test/scala/us/jubat/jubaql_server/processor/TestTags.scala
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/TestTags.scala
@@ -38,3 +38,6 @@ object HDFSTest extends Tag("jubaql.HDFSTest")
 
 // used for tests that use a Kafka installation in some network
 object KafkaTest extends Tag("jubaql.KafkaTest")
+
+// can be used to run just one single test
+object JustThisTest extends Tag("jubaql.JustThisTest")
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/integration/JubaQLProcessorSpec.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/integration/JubaQLProcessorSpec.scala
new file mode 100644
index 0000000..01017fd
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/integration/JubaQLProcessorSpec.scala
@@ -0,0 +1,3839 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2014-2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.integration
+
+import scala.sys.process._
+import scala.io.Source
+import org.scalatest._
+import java.nio.file.{Paths, Files}
+import dispatch._
+import dispatch.Defaults._
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import scala.util.{Success, Failure, Try}
+import us.jubat.jubaql_server.processor._
+import us.jubat.jubaql_server.processor.json.ClassifierResult
+
+/** Tests the correct behavior as viewed from the outside.
+  */
+trait ProcessorTestManager
+  extends Suite
+  with BeforeAndAfter
+  with BeforeAndAfterAll {
+
+  implicit val formats = org.json4s.DefaultFormats
+
+  var process: Process = null
+  var stdout: StringBuffer = null
+  var sendJubaQL: String => Try[(Int, JValue)] = null
+
+  before {
+    val startResult = startProcessor()
+    process = startResult._1
+    stdout = startResult._2
+    sendJubaQL = startResult._3
+  }
+
+  after {
+    // all tests should send SHUTDOWN for cleanup, this is just
+    // a fallback to avoid zombie processes "in case of"
+    sendJubaQL("SHUTDOWN")
+    process.destroy()
+  }
+
+  val goodCdStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+  val goodLsStmt = """LOG STREAM ds1"""
+  val goodCsStmt = """CREATE STREAM ds2 FROM SELECT label FROM ds1 WHERE name = 'test'"""
+  val goodCsfaStmt = """CREATE STREAM ds3 FROM ANALYZE ds1 BY MODEL test1 USING classify AS newcol"""
+  val goodCmStmt = """CREATE CLASSIFIER MODEL test1 (label: label) AS * WITH id CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'"""
+  val goodUmStmt = """UPDATE MODEL test1 USING train FROM ds1"""
+  val goodAStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test1 USING classify"""
+
+  import ProcessUtil.commandToProcessBuilder
+
+  override protected def beforeAll(): Unit = {
+    // if there is no script to start the application yet, generate it
+    if (!Files.exists(Paths.get("start-script/run"))) {
+      val pb = commandToProcessBuilder(Seq("sbt", "start-script"))
+      pb.!
+    }
+    super.beforeAll()
+  }
+
+  protected def waitUntilDone(dsName: String, waitMax: Long,
+                              checkInterval: Long = 400) = {
+    val start = System.currentTimeMillis()
+    var now = System.currentTimeMillis()
+    var state = "Running"
+    while (now - start < waitMax && state != "Finished") {
+      sendJubaQL("STATUS") match {
+        case Success(x) =>
+          x._2 \ "sources" \ dsName match {
+            case JString(currentState) =>
+              state = currentState
+            case other =>
+              // this is a syntax error in the response
+              throw new RuntimeException("no information about data " +
+                s"source '$dsName' in the response")
+          }
+        case Failure(f) =>
+          // this is a failure in the HTTP statement
+          throw f
+      }
+      Thread.sleep(checkInterval)
+      now = System.currentTimeMillis()
+    }
+  }
+
+  protected def startProcessor(): (Process,
+    StringBuffer, String => Try[(Int, JValue)]) = {
+    val command = Seq("./start-script/run")
+    val pb = commandToProcessBuilder(command)
+    val (logger, stdoutBuffer, stderrBuffer) = getProcessLogger()
+    val process = pb run logger
+    val port = getServerPort(stdoutBuffer)
+    (process, stdoutBuffer, sendJubaQLTo(port))
+  }
+
+  protected def getProcessLogger(): (ProcessLogger, StringBuffer, StringBuffer) = {
+    val stdoutBuffer = new StringBuffer()
+    val stderrBuffer = new StringBuffer()
+    val logger = ProcessLogger(line => {
+      stdoutBuffer append line
+      stdoutBuffer append "\n"
+    },
+      line => {
+        stderrBuffer append line
+        stderrBuffer append "\n"
+      })
+    (logger, stdoutBuffer, stderrBuffer)
+  }
+
+  protected def getServerPort(stdout: StringBuffer): Int = {
+    val portRe = "(?s).+listening on port ([0-9]+)\n".r
+    var port = 0
+    while (port == 0) {
+      stdout.toString match {
+        case portRe(loggedPort) =>
+          port = loggedPort.toInt
+        case _ =>
+          Thread.sleep(100)
+      }
+    }
+    port
+  }
+
+  protected def sendJubaQLTo(port: Int)(stmt: String): Try[(Int, JValue)] = {
+    val url = :/("localhost", port) / "jubaql"
+    val body = compact(render("query" -> stmt))
+    Http(url << body).either.apply() match {
+      case Left(error) =>
+        Failure(error)
+      case Right(response) =>
+        Try {
+          (response.getStatusCode,
+            parse(response.getResponseBody("UTF-8")))
+        }
+    }
+  }
+}
+
+class CreateDataSourceSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE DATASOURCE" should "return HTTP 200 on correct syntax" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if a data source with the same name already exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+    // create the same data source again
+    val cd2Result = sendJubaQL(goodCdStmt)
+    cd2Result shouldBe a[Success[_]]
+    cd2Result.get._1 shouldBe 400
+    cd2Result.get._2 \ "result" shouldBe JString("data source 'ds1' already exists")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 200 if a data source with a different name exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+    // create another data source
+    val cd2Result = sendJubaQL(goodCdStmt.replace(" ds1", " ds2"))
+    cd2Result shouldBe a[Success[_]]
+    cd2Result.get._1 shouldBe 200
+    cd2Result.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 500 on bad syntax" taggedAs (LocalTest) in {
+    // TODO no it shouldn't (400 is better)
+    // the statement below is bad because we don't know the protocol hdddddfs
+    val badCdStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "hdddddfs:///jubatus-on-yarn/sample/shogun_data.json")"""
+    val cdResult = sendJubaQL(badCdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 500
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class CreateModelSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE MODEL" should "return HTTP 200 on correct syntax" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+    cmResult.get._2 \ "result" shouldBe JString("CREATE MODEL (started)")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if referenced feature functions do not exist" taggedAs (LocalTest, JubatusTest) in {
+    val badCmStmt = """CREATE CLASSIFIER MODEL test1 (label: label) AS * WITH hoge CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'"""
+    val cmResult = sendJubaQL(badCmStmt)
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  // TODO ignored because server currently ignores the bad syntax
+  it should "return HTTP 400 on bad syntax" taggedAs (LocalTest, JubatusTest) ignore {
+    // the statement below is bad because "hello" is not a valid keyword
+    val badCmStmt = """CREATE CLASSIFIER (hello: label) AS name WITH id CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'"""
+    val cmResult = sendJubaQL(badCmStmt)
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class CreateStreamFromSelectSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE STREAM" should "return HTTP 200 if a referenced data source exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 200 if a referenced created stream exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    val anotherGoodCsStmt = """CREATE STREAM ds3 FROM SELECT label FROM ds2"""
+    val csResult2 = sendJubaQL(anotherGoodCsStmt)
+    if (csResult2.get._1 != 200)
+      println(stdout.toString)
+    csResult2.get._1 shouldBe 200
+    csResult2.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if the referenced stream does not exist" taggedAs (LocalTest) in {
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("unknown streams: ds1")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if the referenced streams come from different data sources" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt.replace("ds1", "dsA"))
+    cdResult shouldBe a[Success[_]]
+    val cd2Result = sendJubaQL(goodCdStmt.replace("ds1", "dsB"))
+    cd2Result shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsStmt.replace("FROM ds1", "FROM dsA JOIN dsB"))
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("you cannot use streams from multiple different " +
+      "data sources in one statement")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if referenced data source was already processed" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // start processing
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    // create stream
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("data source 'ds1' cannot accept further statements")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if a stream with that name already exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    val anotherGoodCsStmt = """CREATE STREAM ds2 FROM SELECT label FROM ds1"""
+    val csResult2 = sendJubaQL(anotherGoodCsStmt)
+    if (csResult2.get._1 != 400)
+      println(stdout.toString)
+    csResult2.get._1 shouldBe 400
+    csResult2.get._2 \ "result" shouldBe JString("stream 'ds2' already exists")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "select correct values in explicitly declared columns and survive empty batches" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL("""CREATE STREAM test FROM SELECT label FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    val lsResult = sendJubaQL("LOG STREAM test")
+    lsResult shouldBe a[Success[_]]
+    lsResult.get._1 shouldBe 200
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+
+    // for non-empty batches, select the value of the respective column
+    val expectedFirstBatch = "STREAM: test\nlabel StringType\n徳川\n徳川"
+    stdout.toString should include(expectedFirstBatch)
+
+    // empty batches are not a problem if the schema is declared
+    val expectedEmptyBatch = "STREAM: test\nlabel StringType\n\n"
+    stdout.toString should include(expectedEmptyBatch)
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "select null for non-existing values in declared columns and survive empty batches" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds1 (abc string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL("""CREATE STREAM test FROM SELECT abc FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    val lsResult = sendJubaQL("LOG STREAM test")
+    lsResult shouldBe a[Success[_]]
+    lsResult.get._1 shouldBe 200
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+
+    // for non-empty batches, select null
+    waitUntilDone("ds1", 6000)
+    val expected = "STREAM: test\nabc StringType\nnull\nnull"
+
+    stdout.toString should include(expected)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "select correct values in inferred columns and FAIL at runtime on an empty batch" taggedAs (LocalTest) in {
+    // TODO no it should not fail
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds1 FROM (STORAGE: "file://src/test/resources/shogun_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL("""CREATE STREAM test FROM SELECT label FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    val lsResult = sendJubaQL("LOG STREAM test")
+    lsResult shouldBe a[Success[_]]
+    lsResult.get._1 shouldBe 200
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+
+    // for non-empty batches, select the value of the respective column
+    val expectedFirstBatch = "STREAM: test\nlabel StringType\n徳川\n徳川"
+    stdout.toString should include(expectedFirstBatch)
+
+    // empty batches lead to failures
+    stdout.toString should include("Unresolved attributes: 'label")
+    stdout.toString should
+      include("JobScheduler - Error generating jobs") // Spark logs error
+    stdout.toString should
+      include("HybridProcessor - Error while waiting for static processing end") // we log once
+    stdout.toString should
+      include("HybridProcessor - Error while setting up stream processing") // ... and again
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "FAIL at runtime when selecting values in non-declared columns" taggedAs (LocalTest) in {
+    // TODO not it should not
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL("""CREATE STREAM test FROM SELECT nocolumn FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+    val lsResult = sendJubaQL("LOG STREAM test")
+    lsResult shouldBe a[Success[_]]
+    lsResult.get._1 shouldBe 200
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+
+    // for any batch, fail
+    stdout.toString should include("Unresolved attributes: 'nocolumn")
+    stdout.toString should
+      include("JobScheduler - Error generating jobs") // Spark logs error
+    stdout.toString should
+      include("HybridProcessor - Error while waiting for static processing end") // we log once
+    stdout.toString should
+      include("HybridProcessor - Error while setting up stream processing") // ... and again
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class AggregatesInSlidingWindowSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  def checkCode(result: Try[(Int, JValue)], code: Int): Unit = {
+    result shouldBe a[Success[_]]
+    if (result.get._1 != code)
+      println(stdout.toString)
+    result.get._1 shouldBe code
+    result.get._2 \ "result" match {
+      case JString(msg) => info(msg)
+      case other => info(result.get._2.toString)
+    }
+    // shut down
+    sendJubaQL("SHUTDOWN") shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  val cdStmt = """CREATE DATASOURCE ds1 (gender string, age numeric, jubaql_timestamp string) FROM (STORAGE: "file://src/test/resources/dummydata/")"""
+
+  "CREATE STREAM" should "return HTTP 400 if there is no column spec" taggedAs(LocalTest) in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 200 if there is exactly one column spec w/o alias" taggedAs(LocalTest) in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      "avg(age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  it should "return HTTP 200 if there is exactly one column spec w/alias" taggedAs(LocalTest) in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      "avg(age) AS hoge"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  it should "return HTTP 200 if there are multiple column specs" taggedAs(LocalTest) in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      "avg(age) AS hoge, histogram(age), concat('|', label) AS labels"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  ("avg" :: "stddev" :: "linapprox" :: "fourier" ::
+    "wavelet" :: "maxelem" :: Nil).foreach(funcName => {
+    funcName should "return HTTP 400 when used without any argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName()"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+
+    it should "return HTTP 200 when used with exactly one argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName(age)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 200)
+    }
+
+    it should "return HTTP 400 when used with more than one argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName(age, label)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+  })
+
+  ("quantile" :: "concat" :: Nil).foreach(funcName => {
+    funcName should "return HTTP 400 when used without any argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName()"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+
+    it should "return HTTP 200 when used with exactly one argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName(age)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 200)
+    }
+
+    it should "return HTTP 200 when used with two correct arguments" in {
+      val param = if (funcName == "quantile") "0.5" else "'|'"
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName($param, age)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 200)
+    }
+
+    it should "return HTTP 400 when used with a non-foldable first argument" in {
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName(label, age)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+
+    it should "return HTTP 400 when used with an ill-typed first argument" in {
+      val param = if (funcName == "concat") "0.5" else "'|'"
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName($param, age)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+
+    it should "return HTTP 400 when used with more than two arguments" in {
+      val param = if (funcName == "quantile") "0.5" else "'|'"
+      sendJubaQL(cdStmt) shouldBe a[Success[_]]
+      val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+        "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+        s"$funcName($param, $param, label)"
+      val result = sendJubaQL(stmt)
+      checkCode(result, 400)
+    }
+  })
+
+  "histogram" should "return HTTP 400 when used without any argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram()"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 200 when used with exactly one argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  // numBins
+
+  it should "return HTTP 200 when used with a correct numBins argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(7, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  it should "return HTTP 400 when used with a non-foldable numBins argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(label, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with an ill-typed numBins argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram('x', age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with an invalid numBins argument" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(-1, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  // bounds
+
+  it should "return HTTP 200 when used with correct bound arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(-1.0, 1.0, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  it should "return HTTP 400 when used with non-foldable bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(age, age, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with ill-typed bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram('x', 1.0, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with invalid bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(1.0, -1.0, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  // all arguments
+
+  it should "return HTTP 200 when used with correct bound and numBins arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(-1.0, 1.0, 7, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 200)
+  }
+
+  it should "return HTTP 400 when used with non-foldable bound and bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(age, age, age, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with ill-typed bound and bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram('x', 1.0, 'y', age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  it should "return HTTP 400 when used with invalid bound and bounds arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(1.0, 1.0, 1, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+
+  // higher number of arguments
+
+  it should "return HTTP 400 when used with more than four arguments" in {
+    sendJubaQL(cdStmt) shouldBe a[Success[_]]
+    val stmt = "CREATE STREAM mystream FROM SLIDING WINDOW " +
+      "(SIZE 2 ADVANCE 1 TUPLES) OVER ds1 WITH " +
+      s"histogram(-1.0, 1.0, 7, 8, age)"
+    val result = sendJubaQL(stmt)
+    checkCode(result, 400)
+  }
+}
+
+class CreateStreamFromSlidingWindowSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  val cdStmt = """CREATE DATASOURCE ds1 (gender string, age numeric, jubaql_timestamp string) FROM (STORAGE: "file://src/test/resources/dummydata/")"""
+
+  "CREATE STREAM" should "return HTTP 200 if the referenced data source exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 2 ADVANCE 1 TUPLES) """ +
+      """OVER ds1 WITH avg(age) AS avg_age"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if the referenced data source does not exist" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 2 ADVANCE 1 TUPLES) """ +
+      """OVER nosuchds WITH avg(age) AS avg_age"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if the referenced function does not exist" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 2 ADVANCE 1 TUPLES) """ +
+      """OVER ds1 WITH hello(age) AS avg_age"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "compute count-based sliding windows correctly" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 4 ADVANCE 1 TUPLES) """ +
+      """OVER ds1 WITH avg(age) AS avg_age, maxelem(gender)"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    // create a view over the output (to make sure columns are selectable) and log it
+    val cs2Result = sendJubaQL("CREATE STREAM ds3 FROM SELECT avg_age, maxelem FROM ds2")
+    cs2Result shouldBe a[Success[_]]
+    cs2Result.get._1 shouldBe 200
+    val logResult = sendJubaQL("LOG STREAM ds3")
+    logResult shouldBe a[Success[_]]
+    logResult.get._1 shouldBe 200
+    sendJubaQL("START PROCESSING ds1") shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    // values from the JSON files
+    // (note that since OrderedFileInputDStream sorts by modification date,
+    // make sure 1.json, 2.json, 3.json files are actually in that order)
+    val ages = 21 :: 22 :: 23 :: 24 :: 21 :: 18 :: 22 :: 31 :: 23 :: Nil
+    val genders = "mfffmfmfm".toList.map(_.toString)
+    val correctValues = ages.zip(genders).sliding(4, 1).map(list => {
+      val avgAge = list.map(_._1)
+        .reduceLeft(_ + _).toDouble / list.size
+      val mostFrequentGender = list.map(_._2)
+        .groupBy(identity).maxBy(_._2.size)._1
+      (avgAge, mostFrequentGender)
+    }).take(3) // if we take 4, then the "maxelem" misbehavior will be visible
+    val headerRow = "avg_age DoubleType | maxelem StringType"
+    stdout.toString should include("STREAM: ds3\n" + headerRow)
+    val outputString = correctValues.map(l => l._1 + " | " + l._2).mkString("\n")
+    stdout.toString should include(headerRow + "\n" + outputString)
+  }
+
+  it should "compute count-based sliding windows with pre-condition correctly" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 4 ADVANCE 1 TUPLES) """ +
+      """OVER ds1 WITH avg(age) AS avg_age, concat(gender) """ +
+      """WHERE age > 21"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    // create a view over the output (to make sure columns are selectable) and log it
+    val cs2Result = sendJubaQL("CREATE STREAM ds3 FROM SELECT avg_age, concat FROM ds2")
+    cs2Result shouldBe a[Success[_]]
+    cs2Result.get._1 shouldBe 200
+    val logResult = sendJubaQL("LOG STREAM ds3")
+    logResult shouldBe a[Success[_]]
+    logResult.get._1 shouldBe 200
+    sendJubaQL("START PROCESSING ds1") shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    // values from the JSON files
+    // (note that since OrderedFileInputDStream sorts by modification date,
+    // make sure 1.json, 2.json, 3.json files are actually in that order)
+    val ages = 21 :: 22 :: 23 :: 24 :: 21 :: 18 :: 22 :: 31 :: 23 :: Nil
+    val genders = "mfffmfmfm".toList.map(_.toString)
+    val correctValues = ages.zip(genders).filter(_._1 > 21).sliding(4, 1).map(list => {
+      val avgAge = list.map(_._1)
+        .reduceLeft(_ + _).toDouble / list.size
+      val genderConcat = list.map(_._2)
+        .reduceLeft(_ + " " + _)
+      (avgAge, genderConcat)
+    }).take(3)
+    val headerRow = "avg_age DoubleType | concat StringType"
+    stdout.toString should include("STREAM: ds3\n" + headerRow)
+    val outputString = correctValues.map(l => l._1 + " | " + l._2).mkString("\n")
+    stdout.toString should include(headerRow + "\n" + outputString)
+  }
+
+  it should "compute count-based sliding windows with post-condition correctly" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM ds2 FROM SLIDING WINDOW (SIZE 4 ADVANCE 1 TUPLES) """ +
+      """OVER ds1 WITH avg(age) AS avg_age, concat(gender) """ +
+      """HAVING avg_age < 25"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    // create a view over the output (to make sure columns are selectable) and log it
+    val cs2Result = sendJubaQL("CREATE STREAM ds3 FROM SELECT avg_age, concat FROM ds2")
+    cs2Result shouldBe a[Success[_]]
+    cs2Result.get._1 shouldBe 200
+    val logResult = sendJubaQL("LOG STREAM ds3")
+    logResult shouldBe a[Success[_]]
+    logResult.get._1 shouldBe 200
+    sendJubaQL("START PROCESSING ds1") shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    // values from the JSON files
+    // (note that since OrderedFileInputDStream sorts by modification date,
+    // make sure 1.json, 2.json, 3.json files are actually in that order)
+    val ages = 21 :: 22 :: 23 :: 24 :: 21 :: 18 :: 22 :: 31 :: 23 :: Nil
+    val genders = "mfffmfmfm".toList.map(_.toString)
+    val correctValues = ages.zip(genders).sliding(4, 1).map(list => {
+      val avgAge = list.map(_._1)
+        .reduceLeft(_ + _).toDouble / list.size
+      val genderConcat = list.map(_._2)
+        .reduceLeft(_ + " " + _)
+      (avgAge, genderConcat)
+    }).filter(_._1 < 25).take(3)
+    val headerRow = "avg_age DoubleType | concat StringType"
+    stdout.toString should include("STREAM: ds3\n" + headerRow)
+    val outputString = correctValues.map(l => l._1 + " | " + l._2).mkString("\n")
+    stdout.toString should include(headerRow + "\n" + outputString)
+  }
+}
+
+
+class LogStreamSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "LOG STREAM" should "return HTTP 200 if a referenced data source exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val lsResult = sendJubaQL(goodLsStmt)
+    lsResult shouldBe a[Success[_]]
+    if (lsResult.get._1 != 200)
+      println(stdout.toString)
+    lsResult.get._1 shouldBe 200
+    lsResult.get._2 \ "result" shouldBe JString("LOG STREAM")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 200 if a referenced created stream exists" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsStmt)
+    csResult shouldBe a[Success[_]]
+    val lsStmt = """LOG STREAM ds2"""
+    val lsResult = sendJubaQL(lsStmt)
+    lsResult shouldBe a[Success[_]]
+    if (lsResult.get._1 != 200)
+      println(stdout.toString)
+    lsResult.get._1 shouldBe 200
+    lsResult.get._2 \ "result" shouldBe JString("LOG STREAM")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "print the contents of the referenced stream" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL("CREATE STREAM ds2 FROM SELECT name FROM ds1 WHERE label = '徳川'")
+    csResult shouldBe a[Success[_]]
+    val lsResult = sendJubaQL("LOG STREAM ds2")
+    lsResult shouldBe a[Success[_]]
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+    stdout.toString should include("家康\n秀忠")
+    stdout.toString should not include("more items")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if the referenced stream does not exist" taggedAs (LocalTest) in {
+    val lsResult = sendJubaQL(goodLsStmt)
+    lsResult shouldBe a[Success[_]]
+    if (lsResult.get._1 != 400)
+      println(stdout.toString)
+    lsResult.get._1 shouldBe 400
+    lsResult.get._2 \ "result" shouldBe JString("source 'ds1' not found")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+
+class CreateStreamFromAnalyzeSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE STREAM" should "return HTTP 200 if referenced data source and model exist" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 200)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csfaStmt = """CREATE STREAM output FROM ANALYZE ds BY MODEL test USING classify AS newcol"""
+    val csfaResult = sendJubaQL(csfaStmt)
+    csfaResult shouldBe a[Success[_]]
+
+    // executed before UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    // executed after UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    // before the first update:
+    stdout.toString should include("徳川 | 家康 | List()")
+    // after the first update:
+    stdout.toString should include regex("徳川 \\| 家康 \\| .*徳川,0.93333333")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with ANOMALY" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/lof.json").getLines().mkString("")
+    val cdStmt = s"""CREATE ANOMALY MODEL test AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csfaStmt = """CREATE STREAM output FROM ANALYZE ds BY MODEL test USING calc_score AS newcol"""
+    val csfaResult = sendJubaQL(csfaStmt)
+    csfaResult shouldBe a[Success[_]]
+
+    // executed before UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING add FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    // executed after UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    // before the first update:
+    stdout.toString should include("徳川 | 家康 | 1.0")
+    // after the first update:
+    stdout.toString should include("徳川 | 家康 | 0.9990")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_id" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: id) AS team WITH unigram, * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csfaStmt = """CREATE STREAM output FROM ANALYZE ds BY MODEL test USING complete_row_from_id AS newcol"""
+    val csfaResult = sendJubaQL(csfaStmt)
+    csfaResult shouldBe a[Success[_]]
+
+    // executed before UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    // executed after UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    // before the first update:
+    stdout.toString should include regex("長野久義 .+Map\\(\\),Map\\(\\)")
+    // after the first update:
+    stdout.toString should include regex("長野久義 .+Map\\(\\),Map\\(.*OPS -> 0.6804.*\\)")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_data" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: id) AS team WITH unigram, * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val aStmt2 = """CREATE STREAM output FROM ANALYZE ds BY MODEL test USING complete_row_from_datum AS newcol"""
+    val aResult2 = sendJubaQL(aStmt2)
+    aResult2 shouldBe a[Success[_]]
+
+    // executed before UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    // executed after UPDATE
+    sendJubaQL("LOG STREAM output") shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    // before the first update:
+    stdout.toString should include regex("長野久義 .+Map\\(\\),Map\\(\\)")
+    // after the first update:
+    stdout.toString should include regex("長野久義 .+Map\\(\\),Map\\(.*OPS -> 0.6804.*\\)")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if referenced data source does not exist" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("source 'ds1' not found")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if referenced data source was already processed" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // start processing
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    // create stream
+    val csResult = sendJubaQL(goodCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("data source 'ds1' cannot accept further statements")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if there is already a stream with the same name" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // create stream
+    val csResult = sendJubaQL(goodCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    // create same stream again
+    val cs2Result = sendJubaQL(goodCsfaStmt)
+    cs2Result shouldBe a[Success[_]]
+    if (cs2Result.get._1 != 400)
+      println(stdout.toString)
+    cs2Result.get._1 shouldBe 400
+    cs2Result.get._2 \ "result" shouldBe JString("stream 'ds3' already exists")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if referenced model does not exist" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val csResult = sendJubaQL(goodCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("no model called 'test1'")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 if learner method does not match" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val badCsfaStmt = """CREATE STREAM ds3 FROM ANALYZE ds1 BY MODEL test1 USING something AS newcol"""
+    val csResult = sendJubaQL(badCsfaStmt)
+    csResult shouldBe a[Success[_]]
+    if (csResult.get._1 != 400)
+      println(stdout.toString)
+    csResult.get._1 shouldBe 400
+    csResult.get._2 \ "result" shouldBe JString("'something' is not a valid method for Classifier")
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class UpdateModelSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "UPDATE MODEL" should "return HTTP 200 when model and datasource are present" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+    umResult.get._2 \ "result" shouldBe JString("UPDATE MODEL")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 when model is missing" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 when data source is missing" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 when method name does not match" taggedAs (LocalTest, JubatusTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val badUmStmt = """UPDATE MODEL test1 USING foobar FROM ds1"""
+    val umResult = sendJubaQL(badUmStmt)
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 400
+    umResult.get._2 \ "result" shouldBe JString("'foobar' is not a valid method for Classifier")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "FAIL at runtime when referencing a non-existing column" taggedAs (LocalTest, JubatusTest) in {
+    // TODO no it should not
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS doesnotexist WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    stdout.toString should include("column named 'doesnotexist' not found")
+    stdout.toString should
+      include("HybridProcessor - Error while waiting for static processing end") // we log once
+    stdout.toString should
+      include("HybridProcessor - Error while setting up stream processing") // ... and again
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "FAIL at runtime when referencing a non-existing column as label/id" taggedAs (LocalTest, JubatusTest) in {
+    // TODO no it should not
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: doesnotexist) AS name WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    stdout.toString should include("the given schema Map(label -> (0,StringType), " +
+      "name -> (1,StringType)) does not contain a column named 'doesnotexist'")
+    stdout.toString should
+      include("HybridProcessor - Error while waiting for static processing end") // we log once
+    stdout.toString should
+      include("HybridProcessor - Error while setting up stream processing") // ... and again
+
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class StartProcessingSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "START PROCESSING" should "return 200 when UPDATE was received before" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    spResult.get._2 \ "result" shouldBe JString("START PROCESSING")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "be able to do subsequent processing of multiple data sources" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    // define data source with processing
+    val cd1Result = sendJubaQL(goodCdStmt)
+    cd1Result shouldBe a[Success[_]]
+    val um1Result = sendJubaQL(goodUmStmt)
+    um1Result shouldBe a[Success[_]]
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+    val stopResult = sendJubaQL("STOP PROCESSING")
+    stopResult shouldBe a[Success[_]]
+    // define another datasource with processing
+    val cd2Result = sendJubaQL(goodCdStmt.replace("ds1", "ab1"))
+    cd2Result shouldBe a[Success[_]]
+    val um2Result = sendJubaQL(goodUmStmt.replace("ds1", "ab1"))
+    um2Result shouldBe a[Success[_]]
+    // start updating
+    val sp2Result = sendJubaQL("START PROCESSING ab1")
+    sp2Result shouldBe a[Success[_]]
+    sp2Result.get._1 shouldBe 200
+    sp2Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ab1", 3000)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "be able to do subsequent processing of multiple data sources with interleaved definitions" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cd1Result = sendJubaQL(goodCdStmt)
+    cd1Result shouldBe a[Success[_]]
+    val cd2Result = sendJubaQL(goodCdStmt.replace("ds1", "ab1"))
+    cd2Result shouldBe a[Success[_]]
+    val um1Result = sendJubaQL(goodUmStmt)
+    um1Result shouldBe a[Success[_]]
+    val um2Result = sendJubaQL(goodUmStmt.replace("ds1", "ab1"))
+    um2Result shouldBe a[Success[_]]
+    // start updating
+    val sp1Result = sendJubaQL("START PROCESSING ds1")
+    sp1Result shouldBe a[Success[_]]
+    sp1Result.get._1 shouldBe 200
+    sp1Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+    val stopResult = sendJubaQL("STOP PROCESSING")
+    stopResult shouldBe a[Success[_]]
+    // start updating
+    val sp2Result = sendJubaQL("START PROCESSING ab1")
+    sp2Result shouldBe a[Success[_]]
+    sp2Result.get._1 shouldBe 200
+    sp2Result.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ab1", 3000)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 when referencing a non-existing data source" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds217")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 400
+    spResult.get._2 \ "result" shouldBe JString("unknown data source: ds217")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 when no UPDATE was received before" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 400
+    spResult.get._2 \ "result" shouldBe JString("there are no processing instructions")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 when running on a data source currently being processed" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    spResult.get._2 \ "result" shouldBe JString("START PROCESSING")
+    // start updating (again)
+    val sp2Result = sendJubaQL("START PROCESSING ds1")
+    sp2Result shouldBe a[Success[_]]
+    sp2Result.get._1 shouldBe 400
+    sp2Result.get._2 \ "result" shouldBe JString("cannot start processing a data source in state Running")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 when running on a data source already processed" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    spResult.get._2 \ "result" shouldBe JString("START PROCESSING")
+    waitUntilDone("ds1", 6000)
+    val stopResult = sendJubaQL("STOP PROCESSING")
+    stopResult shouldBe a[Success[_]]
+    stopResult.get._1 shouldBe 200
+    // start updating (again)
+    val sp2Result = sendJubaQL("START PROCESSING ds1")
+    sp2Result shouldBe a[Success[_]]
+    sp2Result.get._1 shouldBe 400
+    sp2Result.get._2 \ "result" shouldBe JString("cannot start processing a data source in state Finished")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 when running on a data source while another is currently being processed" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cd1Result = sendJubaQL(goodCdStmt)
+    cd1Result shouldBe a[Success[_]]
+    val cd2Result = sendJubaQL(goodCdStmt.replace("ds1", "ab1"))
+    cd2Result shouldBe a[Success[_]]
+    val um1Result = sendJubaQL(goodUmStmt)
+    um1Result shouldBe a[Success[_]]
+    val um2Result = sendJubaQL(goodUmStmt.replace("ds1", "ab1"))
+    um2Result shouldBe a[Success[_]]
+    // start updating
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    spResult.get._2 \ "result" shouldBe JString("START PROCESSING")
+    // start updating (again)
+    val sp2Result = sendJubaQL("START PROCESSING ab1")
+    sp2Result shouldBe a[Success[_]]
+    sp2Result.get._1 shouldBe 400
+    sp2Result.get._2 \ "result" shouldBe JString("there is already a running process, try to run STOP PROCESSING first")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class AnalyzeSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "ANALYZE" should "return with a meaningful result when START PROCESSING was run" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    // query
+    waitUntilDone("ds1", 3000)
+    val aResult = sendJubaQL(goodAStmt)
+    aResult shouldBe a[Success[_]]
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return with a meaningful result after STOP PROCESSING" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    val startResult = sendJubaQL("START PROCESSING ds1")
+    startResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 3000)
+    val spResult = sendJubaQL("STOP PROCESSING")
+    spResult shouldBe a[Success[_]]
+    // query
+    val aResult = sendJubaQL(goodAStmt)
+    aResult shouldBe a[Success[_]]
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER on a created STREAM" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM streamds FROM SELECT label AS group, name AS alias FROM ds"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: group) AS alias WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM streamds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"alias": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with ANOMALY" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/lof.json").getLines().mkString("")
+    val cdStmt = s"""CREATE ANOMALY MODEL test AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING add FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING calc_score"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "score" match {
+      case JDouble(score) => {
+        // the result of calc_score seems to differ slightly between
+        // machines/OSes, therefore we do a proximity check instead
+        // of equality comparison
+        Math.abs(score - 1.0917037) should be < 0.0005
+      }
+      case _ =>
+        fail("Failed to parse returned content as an anomaly result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with ANOMALY on a created STREAM" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/lof.json").getLines().mkString("")
+    val cdStmt = s"""CREATE ANOMALY MODEL test AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    // NB. Apparently, when we rename the name column to something else
+    // (like "xname"), then the result of the anomaly check will differ slightly.
+    // Therefore, we create two streams and rename twice (back to the original name).
+
+    val csStmtTmp = """CREATE STREAM streamds_tmp FROM SELECT name AS xname FROM ds"""
+    val csResultTmp = sendJubaQL(csStmtTmp)
+    csResultTmp shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM streamds FROM SELECT xname AS name FROM streamds_tmp"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING add FROM streamds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING calc_score"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "score" match {
+      case JDouble(score) => {
+        // the result of calc_score seems to differ slightly between
+        // machines/OSes, therefore we do a proximity check instead
+        // of equality comparison
+        Math.abs(score - 1.0917037) should be < 0.0005
+      }
+      case _ =>
+        fail("Failed to parse returned content as an anomaly result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_id" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: id) AS * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE 'スレッジ' BY MODEL test USING complete_row_from_id"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "num_values" match {
+      case JObject(list) =>
+        val vals = list.collect({
+          case (s, JDouble(j)) => (s, j)
+        }).toMap
+        Math.abs(vals("長打率") - 0.3539453148841858) should be < 0.00001
+        Math.abs(vals("試合数") - 104.234375) should be < 0.00001
+        Math.abs(vals("打数") - 331.5546875) should be < 0.00001
+      case _ =>
+        fail("there was no 'num_values' key")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_id on a created STREAM" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: newid) AS * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM streamds FROM SELECT id AS newid, * FROM ds"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM streamds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE 'スレッジ' BY MODEL test USING complete_row_from_id"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "num_values" match {
+      case JObject(list) =>
+        val vals = list.collect({
+          case (s, JDouble(j)) => (s, j)
+        }).toMap
+        Math.abs(vals("長打率") - 0.3539453148841858) should be < 0.00001
+        Math.abs(vals("試合数") - 104.234375) should be < 0.00001
+        Math.abs(vals("打数") - 331.5546875) should be < 0.00001
+      case _ =>
+        fail("there was no 'num_values' key")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_data" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: id) AS * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"team":"巨人","打率":0.209,"試合数":65.0,"打席":149.0,"打数":129.0,"安打":27.0,"本塁打":0.0,"打点":8.0,"盗塁":2.0,"四球":12.0,"死球":0.0,"三振":28.0,"犠打":6.0,"併殺打":5.0,"長打率":0.256,"出塁率":0.273,"OPS":0.529,"RC27":1.96,"XR27":2.07}' BY MODEL test USING complete_row_from_datum"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "num_values" match {
+      case JObject(list) =>
+        val vals = list.collect({
+          case (s, JDouble(j)) => (s, j)
+        }).toMap
+        Math.abs(vals("長打率") - 0.33874213695526123) should be < 0.00001
+        Math.abs(vals("試合数") - 100.953125) should be < 0.00001
+        Math.abs(vals("打数") - 307.8046875) should be < 0.00001
+      case _ =>
+        fail("there was no 'num_values' key")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with RECOMMENDER/from_data on a created STREAM" taggedAs (LocalTest, JubatusTest) in {
+    val cmStmt = """CREATE DATASOURCE ds FROM (STORAGE: "file://src/test/resources/npb_similar_player_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/npb_similar_player.json").getLines().mkString("")
+    val cdStmt = s"""CREATE RECOMMENDER MODEL test (id: id) AS * WITH id CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csStmt = """CREATE STREAM streamds FROM SELECT team AS chiimu, * FROM ds"""
+    val csResult = sendJubaQL(csStmt)
+    csResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING update_row FROM streamds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"chiimu":"巨人","打率":0.209,"試合数":65.0,"打席":149.0,"打数":129.0,"安打":27.0,"本塁打":0.0,"打点":8.0,"盗塁":2.0,"四球":12.0,"死球":0.0,"三振":28.0,"犠打":6.0,"併殺打":5.0,"長打率":0.256,"出塁率":0.273,"OPS":0.529,"RC27":1.96,"XR27":2.07}' BY MODEL test USING complete_row_from_datum"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "num_values" match {
+      case JObject(list) =>
+        val vals = list.collect({
+          case (s, JDouble(j)) => (s, j)
+        }).toMap
+        Math.abs(vals("長打率") - 0.33874213695526123) should be < 0.00001
+        Math.abs(vals("試合数") - 100.953125) should be < 0.00001
+        Math.abs(vals("打数") - 307.8046875) should be < 0.00001
+      case _ =>
+        fail("there was no 'num_values' key")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "still return HTTP 200 when START PROCESSING was not run" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    // query
+    val aResult = sendJubaQL(goodAStmt)
+    aResult shouldBe a[Success[_]]
+    aResult.get._1 shouldBe 200
+    aResult.get._2 \ "result" \ "predictions" shouldBe a[JArray]
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on bad syntax" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    // the below statement is bad because it references a nonexisting algorithm
+    val aResult = sendJubaQL( """ANALYZE '{"name": "慶喜"}' BY MODEL test1 USING aNonExistingAlgorithm""")
+    aResult shouldBe a[Success[_]]
+    aResult.get._1 shouldBe 400
+    aResult.get._2 \ "result" shouldBe JString("cannot use model 'test1' with method 'aNonExistingAlgorithm'")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 when there is no model" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val aResult = sendJubaQL(goodAStmt)
+    aResult shouldBe a[Success[_]]
+    aResult.get._1 shouldBe 400
+    aResult.get._2 \ "result" shouldBe JString("model 'test1' does not exist")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class StopProcessingSpec
+  extends FlatSpec
+  with Matchers
+  with HasKafkaPath
+  with ProcessorTestManager {
+
+  "STOP PROCESSING" should "stop within a moderate time when data is processed" taggedAs (HDFSTest, JubatusTest) in {
+    val cmResult = sendJubaQL("""CREATE CLASSIFIER MODEL test1 (label: movie_type) AS title WITH id, description WITH id CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""")
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL( s"""CREATE DATASOURCE ds1 (movie_type string, title string, description string) FROM (STORAGE: "hdfs:///user/fluentd/dummy", STREAM: "kafka://$kafkaPath/dummy/1")""")
+    cdResult shouldBe a[Success[_]]
+    // start updating
+    val umResult = sendJubaQL( """UPDATE MODEL test1 USING train FROM ds1""")
+    umResult shouldBe a[Success[_]]
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    Thread.sleep(5000)
+    // stop processing
+    val startTime = System.nanoTime()
+    val stopResult = sendJubaQL("STOP PROCESSING")
+    stopResult shouldBe a[Success[_]]
+    stopResult.get._1 shouldBe 200
+    stopResult.get._2 \ "result" shouldBe a[JString]
+    (stopResult.get._2 \ "result").asInstanceOf[JString].values should
+      startWith("STOP PROCESSING")
+    val executionTime = (System.nanoTime() - startTime)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    executionTime.toDouble should be < 25e9 // less than 25 seconds
+  }
+
+  it should "return HTTP 400 if no processing is running" taggedAs (HDFSTest, JubatusTest) in {
+    val cmResult = sendJubaQL("""CREATE CLASSIFIER MODEL test1 (label: movie_type) AS title WITH id, description WITH id CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""")
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL( s"""CREATE DATASOURCE ds1 (movie_type string, title string, description string) FROM (STORAGE: "hdfs:///user/fluentd/dummy", STREAM: "kafka://$kafkaPath/dummy/1")""")
+    cdResult shouldBe a[Success[_]]
+    // start updating
+    val umResult = sendJubaQL( """UPDATE MODEL test1 USING train FROM ds1""")
+    umResult shouldBe a[Success[_]]
+    Thread.sleep(2000)
+    // stop processing
+    val stopResult = sendJubaQL("STOP PROCESSING")
+    stopResult shouldBe a[Success[_]]
+    stopResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class StatusSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+  "STATUS" should "display empty lists when nothing exists" taggedAs (LocalTest) in {
+    // send only STATUS
+    val stResult = sendJubaQL("STATUS")
+    stResult shouldBe a[Success[_]]
+    stResult.get._1 shouldBe 200
+    stResult.get._2 \ "result" shouldBe JString("STATUS")
+    stResult.get._2 \ "sources" shouldBe a[JObject]
+    stResult.get._2 \ "models" shouldBe a[JObject]
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "display model information" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+    // check STATUS
+    val stResult = sendJubaQL("STATUS")
+    stResult shouldBe a[Success[_]]
+    stResult.get._1 shouldBe 200
+    stResult.get._2 \ "result" shouldBe JString("STATUS")
+    (stResult.get._2 \ "models").extractOpt[Map[String, String]] match {
+      case Some(models) =>
+        models.keys should contain("test1")
+        models("test1") shouldBe "OK"
+      case None =>
+        fail("'models' key did not contain a map")
+    }
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "display source information" taggedAs (LocalTest) in {
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    // check STATUS
+    val stResult = sendJubaQL("STATUS")
+    stResult shouldBe a[Success[_]]
+    stResult.get._1 shouldBe 200
+    stResult.get._2 \ "result" shouldBe JString("STATUS")
+    (stResult.get._2 \ "sources").extractOpt[Map[String, String]] match {
+      case Some(sources) =>
+        sources.keys should contain("ds1")
+        sources("ds1") shouldBe "Initialized"
+      case None =>
+        fail("'sources' key did not contain a map")
+    }
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "update source information" taggedAs (LocalTest, JubatusTest) in {
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL(goodCdStmt)
+    cdResult shouldBe a[Success[_]]
+    val umResult = sendJubaQL(goodUmStmt)
+    umResult shouldBe a[Success[_]]
+    // check STATUS
+    val stResultInit = sendJubaQL("STATUS")
+    stResultInit shouldBe a[Success[_]]
+    stResultInit.get._1 shouldBe 200
+    stResultInit.get._2 \ "sources" \ "ds1" shouldBe JString("Initialized")
+    // start processing
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    // check STATUS
+    val stResultRunning = sendJubaQL("STATUS")
+    stResultRunning shouldBe a[Success[_]]
+    stResultRunning.get._1 shouldBe 200
+    stResultRunning.get._2 \ "sources" \ "ds1" shouldBe JString("Running")
+    // wait a bit
+    Thread.sleep(30000)
+    val stResultDone = sendJubaQL("STATUS")
+    stResultDone shouldBe a[Success[_]]
+    stResultDone.get._1 shouldBe 200
+    stResultDone.get._2 \ "sources" \ "ds1" shouldBe JString("Finished")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+
+  }
+}
+
+class ShutdownSpec
+  extends FlatSpec
+  with Matchers
+  with HasKafkaPath
+  with ProcessorTestManager {
+
+  "SHUTDOWN" should "stop the running instance" taggedAs (LocalTest) in {
+    // send only SHUTDOWN
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+    sdResult.get._2 \ "result" shouldBe a[JString]
+    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    stdout.toString should include("shut down successfully")
+  }
+
+  it should "stop even after datasource was created" taggedAs (LocalTest) in {
+    // set up a data source
+    val cmResult = sendJubaQL(goodCdStmt)
+    cmResult shouldBe a[Success[_]]
+    // send only SHUTDOWN
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+    sdResult.get._2 \ "result" shouldBe a[JString]
+    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    stdout.toString should include("shut down successfully")
+  }
+
+  it should "stop even after model was created" taggedAs (LocalTest, JubatusTest) in {
+    // set up a data source
+    val cmResult = sendJubaQL(goodCmStmt)
+    cmResult shouldBe a[Success[_]]
+    // send only SHUTDOWN
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+    sdResult.get._2 \ "result" shouldBe a[JString]
+    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    stdout.toString should include("shut down successfully")
+  }
+
+  it should "stop within a moderate time even when data is processed" taggedAs (HDFSTest, JubatusTest) in {
+    val cmResult = sendJubaQL("""CREATE CLASSIFIER MODEL test1 (label: movie_type) AS title WITH id, description WITH id CONFIG '{"method": "AROW", "parameter": {"regularization_weight" : 1.0}}'""")
+    cmResult shouldBe a[Success[_]]
+    val cdResult = sendJubaQL( s"""CREATE DATASOURCE ds1 (movie_type string, title string, description string) FROM (STORAGE: "hdfs:///user/fluentd/dummy", STREAM: "kafka://$kafkaPath/dummy/1")""")
+    cdResult shouldBe a[Success[_]]
+    // start updating
+    val umResult = sendJubaQL( """UPDATE MODEL test1 USING train FROM ds1""")
+    umResult shouldBe a[Success[_]]
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    Thread.sleep(5000)
+    // shut down
+    val startTime = System.nanoTime()
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._2 \ "result" shouldBe a[JString]
+    (sdResult.get._2 \ "result").asInstanceOf[JString].values should startWith("SHUTDOWN")
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+    val executionTime = (System.nanoTime() - startTime)
+    executionTime.toDouble should be < 25e9 // less than 25 seconds
+  }
+}
+
+class CreateFunctionSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE FUNCTION" should "return HTTP 200 on correct syntax" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id(arg string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on bad syntax" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL("""CREATE FUNCTION id(arg string)""")
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on apparently bad JavaScript syntax" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id(arg string) RETURNS string LANGUAGE JavaScript AS $$
+        |/
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on empty parameter list" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id() RETURNS string LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on bad parameter type" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id(arg hello) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on bad return type" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id(arg string) RETURNS hello LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on too long parameter list" taggedAs (LocalTest) in {
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION id(arg1 string, arg2 string, arg3 string, arg4 string, arg5 string, arg6 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a function which takes a string argument" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION addABC(arg string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg + "ABC";
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT addABC(label) AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川ABC", "足利ABC", "北条ABC")
+        Math.abs(scores("徳川ABC") - 0.07692306488752365) should be < 0.00001
+        scores("足利ABC") shouldBe 0.0
+        scores("北条ABC") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a function which takes two string arguments" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION concat(arg1 string, arg2 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg1 + arg2;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT concat(label, "ABC") AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川ABC", "足利ABC", "北条ABC")
+        Math.abs(scores("徳川ABC") - 0.07692306488752365) should be < 0.00001
+        scores("足利ABC") shouldBe 0.0
+        scores("北条ABC") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  // TODO: generate tests which take many arguments
+  it should "make callable a function which takes three string arguments" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION concat3(arg1 string, arg2 string, arg3 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg1 + arg2 + arg3;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT concat3(label, "AB", "C") AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川ABC", "足利ABC", "北条ABC")
+        Math.abs(scores("徳川ABC") - 0.07692306488752365) should be < 0.00001
+        scores("足利ABC") shouldBe 0.0
+        scores("北条ABC") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a function which takes four string arguments" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION concat4(arg1 string, arg2 string, arg3 string, arg4 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg1 + arg2 + arg3 + arg4;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT concat4(label, "A", "B", "C") AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川ABC", "足利ABC", "北条ABC")
+        Math.abs(scores("徳川ABC") - 0.07692306488752365) should be < 0.00001
+        scores("足利ABC") shouldBe 0.0
+        scores("北条ABC") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a function which takes five string arguments" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION concat5(arg1 string, arg2 string, arg3 string, arg4 string, arg5 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg1 + arg2 + arg3 + arg4 + arg5;
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT concat5(label, "A", "B", "C", "D") AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川ABCD", "足利ABCD", "北条ABCD")
+        Math.abs(scores("徳川ABCD") - 0.07692306488752365) should be < 0.00001
+        scores("足利ABCD") shouldBe 0.0
+        scores("北条ABCD") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a function which takes arguments of different type" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cfResult = sendJubaQL(
+      """CREATE FUNCTION multiply(n numeric, s string) RETURNS string LANGUAGE JavaScript AS $$
+        |return Array(n + 1).join(s);
+        |$$
+      """.stripMargin)
+    cfResult shouldBe a[Success[_]]
+    cfResult.get._1 shouldBe 200
+    cfResult.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cfResult2 = sendJubaQL(
+      """CREATE FUNCTION concat(arg1 string, arg2 string) RETURNS string LANGUAGE JavaScript AS $$
+        |return arg1 + arg2;
+        |$$
+      """.stripMargin)
+    cfResult2 shouldBe a[Success[_]]
+    cfResult2.get._1 shouldBe 200
+    cfResult2.get._2 \ "result" shouldBe JString("CREATE FUNCTION")
+
+    val cmStmt = """CREATE DATASOURCE ds1 (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cdStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+
+    val csResult = sendJubaQL("""CREATE STREAM ds2 FROM SELECT concat(multiply(3, label), "ABC") AS label, name FROM ds1""")
+    csResult shouldBe a[Success[_]]
+    csResult.get._1 shouldBe 200
+    csResult.get._2 \ "result" shouldBe JString("CREATE STREAM")
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds2"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds1")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds1", 6000)
+
+    // analyze
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // now check the result
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川徳川徳川ABC", "足利足利足利ABC", "北条北条北条ABC")
+        Math.abs(scores("徳川徳川徳川ABC") - 0.07692306488752365) should be < 0.00001
+        scores("足利足利足利ABC") shouldBe 0.0
+        scores("北条北条北条ABC") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
+
+class CreateTriggerFunctionSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  "CREATE TRIGGER FUNCTION" should "return HTTP 200 on correct syntax" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg1 string, arg2 string, arg3 string) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return 400 on bad syntax" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL("""CREATE TRIGGER FUNCTION doNothing(arg string)""")
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on apparently bad JavaScript syntax" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg string) LANGUAGE JavaScript AS $$
+        |/
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on empty parameter list" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing() LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on bad parameter type" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg hello) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "return HTTP 400 on too long parameter list" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg1 string, arg2 string, arg3 string, arg4 string, arg5 string, arg6 string) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+    // shut down
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    // wait until shutdown
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "make callable a trigger function which takes a string argument" taggedAs(LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION printLine(arg string) LANGUAGE JavaScript AS $$
+        |println(arg);
+        |$$
+      """.stripMargin)
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val cdStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE printLine(label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 200
+    ctResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER")
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cmStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    val lines = stdout.toString.split('\n')
+    lines.filter(_ == "徳川").length shouldBe 14
+    lines.filter(_ == "足利").length shouldBe 15
+    lines.filter(_ == "北条").length shouldBe 15
+
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+  }
+
+  it should "make callable a trigger function which takes two string arguments" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION printLine(arg1 string, arg2 string) LANGUAGE JavaScript AS $$
+        |println(arg1 + arg2);
+        |$$
+      """.stripMargin)
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val cdStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE printLine(label, name)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 200
+    ctResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER")
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cmStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    val lines = stdout.toString.split('\n')
+    lines.filter(_.startsWith("徳川")).length shouldBe 14
+    lines.filter(_.startsWith("足利")).length shouldBe 15
+    lines.filter(_.startsWith("北条")).length shouldBe 15
+
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+  }
+
+  it should "make callable a trigger function which takes arguments of different type" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    // NOTE: use while instead of for i = i + 1 instead of ++i in JavaScript to manage current parser implementation.
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION printLines(n numeric, label string) LANGUAGE JavaScript AS $$
+        |var i = 0;
+        |while (i < n) {
+        |  println(label);
+        |  i = i + 1;
+        |}
+        |$$
+      """.stripMargin)
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val cdStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE printLines(2, label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 200
+    ctResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER")
+
+    val config = Source.fromFile("src/test/resources/shogun.json").getLines().mkString("")
+    val cmStmt = s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH unigram CONFIG '$config'"""
+    val cmResult = sendJubaQL(cmStmt)
+    cmResult shouldBe a[Success[_]]
+
+    val umStmt = """UPDATE MODEL test USING train FROM ds"""
+    val umResult = sendJubaQL(umStmt)
+    umResult shouldBe a[Success[_]]
+
+    val spResult = sendJubaQL("START PROCESSING ds")
+    spResult shouldBe a[Success[_]]
+    waitUntilDone("ds", 6000)
+    val lines = stdout.toString.split('\n')
+    lines.filter(_.startsWith("徳川")).length shouldBe 14 * 2
+    lines.filter(_.startsWith("足利")).length shouldBe 15 * 2
+    lines.filter(_.startsWith("北条")).length shouldBe 15 * 2
+
+    val aStmt = """ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify"""
+    val aResult = sendJubaQL(aStmt)
+
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        // the order of entries differs per machine/OS, so we use this
+        // slightly complicated way of checking equality
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+      case None =>
+        fail("Failed to parse returned content as a classifier result")
+    }
+  }
+}
+
+class CreateTriggerSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  // cannot use BeforeAndAfter.before in this class
+  def createDatasource(): Unit = {
+    val cdStmt = """CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")"""
+    val cdResult = sendJubaQL(cdStmt)
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+    cdResult.get._2 \ "result" shouldBe JString("CREATE DATASOURCE")
+  }
+
+  // This may be moved to ProcessorTestManager.
+  def shutdown(): Unit = {
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    val exitValue = process.exitValue
+    exitValue shouldBe 0
+  }
+
+  "CREATE TRIGGER" should "return HTTP 200 on correct syntax." taggedAs (LocalTest) in {
+    createDatasource()
+
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg string) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE doNothing(label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 200
+    ctResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER")
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on unknown function" taggedAs (LocalTest) in {
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg string) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE doNothing(label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on unknown data source" taggedAs (LocalTest) in {
+    createDatasource()
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE doNothing(label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on wrong number of parameters" taggedAs (LocalTest) in {
+    createDatasource()
+
+    val ctfResult = sendJubaQL(
+      """CREATE TRIGGER FUNCTION doNothing(arg string) LANGUAGE JavaScript AS $$
+        |
+        |$$
+      """.stripMargin)
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 200
+    ctfResult.get._2 \ "result" shouldBe JString("CREATE TRIGGER FUNCTION")
+
+    val ctResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE doNothing(name, label)""")
+    ctResult shouldBe a[Success[_]]
+    ctResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on bad syntax" taggedAs (LocalTest) in {
+    createDatasource()
+
+    // RW (not ROW)
+    val ctfResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH RW EXECUTE someFunction(label)""")
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on passing a non-function expression" taggedAs (LocalTest) in {
+    createDatasource()
+
+    val ctfResult = sendJubaQL("""CREATE TRIGGER ON ds FOR EACH ROW EXECUTE 1 + someFunction(label)""")
+    ctfResult shouldBe a[Success[_]]
+    ctfResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+}
+
+class CreateFeatureFunctionSpec
+  extends FlatSpec
+  with Matchers
+  with ProcessorTestManager {
+
+  // This may be moved to ProcessorTestManager.
+  def shutdown(): Unit = {
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    val exitValue = process.exitValue
+    exitValue shouldBe 0
+  }
+
+  "CREATE FEATURE FUNCTION" should "return HTTP 200 on correct syntax" taggedAs (LocalTest) in {
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION abc(arg string) LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+    cffResult.get._2 \ "result" shouldBe JString("CREATE FEATURE FUNCTION")
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on bad syntax" taggedAs (LocalTest) in {
+    val cffResult = sendJubaQL( """CREATE FEATURE FUNCTION abc(arg string)""")
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on apparently bad JavaScript syntax" taggedAs (LocalTest) in {
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION abc(arg string) LANGUAGE JavaScript AS $$
+        |\
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on empty parameter list" taggedAs (LocalTest) in {
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION abc() LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "return HTTP 400 on bad parameter type" taggedAs (LocalTest) in {
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION abc(arg hello) LANGUAGE JavaScript AS $$
+        |return arg;
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 400
+
+    shutdown()
+  }
+
+  it should "work correctly with CLASSIFIER when * is specified" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION f(name string) LANGUAGE JavaScript AS $$
+        |if (name == "Yoshinobu") // test for ANALYZE
+        |  name = "慶喜";
+        |else { // test for UPDATE
+        |  switch (name) {
+        |  case "Ieyasu":
+        |    name = "家康";
+        |    break;
+        |  case "Hidetada":
+        |    name = "秀忠";
+        |    break;
+        |  }
+        |}
+        |return {"value": name};
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_alpha_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS * WITH f CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name": "Yoshinobu"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER when * with a prefix is specified" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION f(name string) LANGUAGE JavaScript AS $$
+        |if (name == "Yoshinobu") // test for ANALYZE
+        |  name = "慶喜";
+        |else { // test for UPDATE
+        |  switch (name) {
+        |  case "Ieyasu":
+        |    name = "家康";
+        |    break;
+        |  case "Hidetada":
+        |    name = "秀忠";
+        |    break;
+        |  }
+        |}
+        |return {"value": name};
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_alpha_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS nam* WITH f CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name": "Yoshinobu"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER when * with a suffix is specified" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION f(name string) LANGUAGE JavaScript AS $$
+        |if (name == "Yoshinobu") // test for ANALYZE
+        |  name = "慶喜";
+        |else { // test for UPDATE
+        |  switch (name) {
+        |  case "Ieyasu":
+        |    name = "家康";
+        |    break;
+        |  case "Hidetada":
+        |    name = "秀忠";
+        |    break;
+        |  }
+        |}
+        |return {"value": name};
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_alpha_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS *ame WITH f CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name": "Yoshinobu"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER using a named parameter" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION f(name string) LANGUAGE JavaScript AS $$
+        |if (name == "Yoshinobu") // test for ANALYZE
+        |  name = "慶喜";
+        |else { // test for UPDATE
+        |  switch (name) {
+        |  case "Ieyasu":
+        |    name = "家康";
+        |    break;
+        |  case "Hidetada":
+        |    name = "秀忠";
+        |    break;
+        |  }
+        |}
+        |return {"value": name};
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_alpha_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS name WITH f CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name": "Yoshinobu"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER using named parameters" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cffResult = sendJubaQL(
+      """CREATE FEATURE FUNCTION concatNames(name1 string, name2 string) LANGUAGE JavaScript AS $$
+        |var name = name1 + name2;
+        |return {"value": name};
+        |$$
+      """.stripMargin)
+    cffResult shouldBe a[Success[_]]
+    cffResult.get._1 shouldBe 200
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name1 string, name2 string) FROM (STORAGE: "file://src/test/resources/shogun_splitted_name_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS (name1, name2) WITH concatNames CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name1": "慶", "name2": "喜"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+
+  it should "work correctly with CLASSIFIER when WITH clause is not specified" taggedAs (LocalTest, JubatusTest) in {
+    implicit val formats = DefaultFormats
+
+    val cdResult = sendJubaQL("""CREATE DATASOURCE ds (label string, name string) FROM (STORAGE: "file://src/test/resources/shogun_data.json")""")
+    cdResult shouldBe a[Success[_]]
+    cdResult.get._1 shouldBe 200
+
+    val config = Source.fromFile("src/test/resources/shogun_full.json").getLines().mkString("")
+    val cmResult = sendJubaQL(s"""CREATE CLASSIFIER MODEL test (label: label) AS name CONFIG '$config'""")
+    cmResult shouldBe a[Success[_]]
+    cmResult.get._1 shouldBe 200
+
+    val umResult = sendJubaQL("""UPDATE MODEL test USING train FROM ds""")
+    umResult shouldBe a[Success[_]]
+    umResult.get._1 shouldBe 200
+
+    val spResult = sendJubaQL("""START PROCESSING ds""")
+    spResult shouldBe a[Success[_]]
+    spResult.get._1 shouldBe 200
+    waitUntilDone("ds", 6000)
+
+    // analyze
+    val aResult = sendJubaQL("""ANALYZE '{"name": "慶喜"}' BY MODEL test USING classify""")
+    // shutdown
+    val sdResult = sendJubaQL("SHUTDOWN")
+    sdResult shouldBe a[Success[_]]
+    sdResult.get._1 shouldBe 200
+
+    // check
+    aResult shouldBe a[Success[_]]
+    if (aResult.get._1 != 200)
+      println(stdout.toString)
+    aResult.get._1 shouldBe 200
+    (aResult.get._2 \ "result").extractOpt[ClassifierResult] match {
+      case Some(pred) =>
+        val scores = pred.predictions.map(res => (res.label, res.score)).toMap
+        scores.keys.toList should contain only("徳川", "足利", "北条")
+        Math.abs(scores("徳川") - 0.07692306488752365) should be < 0.00001
+        scores("足利") shouldBe 0.0
+        scores("北条") shouldBe 0.0
+
+      case None =>
+        fail("")
+    }
+
+    val exitValue = process.exitValue()
+    exitValue shouldBe 0
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctionsTest.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctionsTest.scala
new file mode 100644
index 0000000..61815b0
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/AggregateFunctionsTest.scala
@@ -0,0 +1,388 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.udf
+
+import scala.math.Ordering
+
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.SparkException
+import org.apache.spark.rdd.RDD
+import org.scalacheck.Arbitrary
+import org.scalacheck.Gen
+import org.scalatest.FlatSpec
+import org.scalatest.Matchers
+import org.scalatest.matchers.MatchResult
+import org.scalatest.matchers.Matcher
+import org.scalatest.prop.GeneratorDrivenPropertyChecks
+
+import com.typesafe.scalalogging.slf4j.LazyLogging
+
+import OrderedValueRDDFunctions.rddToOrderedValueRDDFunctions
+
+case class Int0to7(val value: Int) extends AnyVal
+case class Int0to100(val value: Int) extends AnyVal
+case class Double0to1(val value: Double) extends AnyVal
+case class Int1to4(val value: Int) extends AnyVal
+
+class AggregateFunctionsTest extends FlatSpec
+  with Matchers
+  with GeneratorDrivenPropertyChecks
+  with SharedSparkContext
+  with LazyLogging {
+  import scala.language.implicitConversions
+
+  val numSlices = 2
+  implicit def convertSeqToRdd[_Key, _Value](in: Seq[(_Key, _Value)]): RDD[(_Key, _Value)] = sc.parallelize(in, numSlices)
+  implicit def convertDouble0to1ToDouble(in: Double0to1): Double = in.value
+  implicit def convertInt1to4ToInt(in: Int1to4): Int = in.value
+
+  import org.apache.spark.SparkContext._
+  import org.scalatest.concurrent.Timeouts._
+  import org.scalacheck.Arbitrary._
+
+  /** 任意の Seq[(Int, Double)] を生成するためのジェネレータです。 */
+  implicit val arbIndexedValues = Arbitrary {
+    for {
+      values <- arbitrary[Seq[Double]]
+      if !values.isEmpty
+    } yield values.zipWithIndex.map { case (value, index) => (index, value) }
+  }
+
+  /** [0 - 7] の値域をもつ任意の Int を生成するためのジェネレータです。 */
+  implicit val arbInt0to7 = Arbitrary {
+    for {
+      value <- Gen.choose(0, 7)
+    } yield Int0to7(value)
+  }
+
+  /** [1 - 4] の値域をもつ任意の Int を生成するためのジェネレータです。 */
+  implicit val arbRepartition = Arbitrary {
+    for {
+      value <- Gen.choose(1, 4)
+    } yield Int1to4(value)
+  }
+
+  /** [0 - 100] の値域をもつ任意の Int を生成するためのジェネレータです。 */
+  implicit val arbInt0to100 = Arbitrary {
+    for {
+      value <- Gen.choose(0, 100)
+    } yield Int0to100(value)
+  }
+
+  /** [0.0 - 1.0] の値域をもつ任意の Double を生成するためのジェネレータです。 */
+  implicit val arbDouble0to1 = Arbitrary {
+    for {
+      value <- Gen.choose(0d, 1d)
+    } yield Double0to1(value)
+  }
+
+  /** actural と expected の各要素を check 関数を使って比較します。 */
+  def test[_Key, _Result, _Index: Ordering, _Value](expected: Seq[(_Key, (_Index, _Value))], actual: Seq[(_Key, _Result)])(check: (Seq[(_Index, _Value)], _Result) => Unit) = {
+    actual.foreach { case (key, result) => check(expected.filter(_._1 == key).map(_._2), result) }
+  }
+
+  "OrderedValueRDDFunctions.groupByKeyAndSortValues" should "sort values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))]) =>
+      test(values, sc.parallelize(values, 2).groupByKeyAndSortValues(identity(_)).mapValues(_.toList).collect()) {
+        case (values, result) => result should be(values.sorted)
+      }
+    }
+  }
+
+  it should "be escalate exception" in {
+    an[SparkException] should be thrownBy sc.parallelize(Seq((0, (0, Double.NaN))), 2).groupByKeyAndSortValues(_ => throw new IllegalArgumentException).collect()
+  }
+
+  /**
+   * left と right を誤差を考慮して比較します。
+   * 既存の plusOrMinus を使った比較では、巨大な値の比較で不都合があったため独自に実装しました。
+   */
+  def equalWithinTolerance(left: Double, right: Double): Boolean = {
+    if (left == right) true
+    else if (left.isNaN && right.isNaN) true
+    else if (left.isInfinity || right.isInfinity) false
+    else (left - right).abs / (left.abs max right.abs) <= 1E-15
+  }
+
+  /** left と right を誤差を考慮して比較します。 */
+  def equalWithinTolerance(left: Seq[Double], right: Seq[Double]): Boolean = {
+    left.length == right.length && (left zip right).foldLeft(true) { case (a, (l, r)) => a && equalWithinTolerance(l, r) }
+  }
+
+  /** left と right を誤差を考慮して比較する Matcher です。 */
+  def beEqualWithinTolerance(right: Double) = Matcher {
+    (left: Double) =>
+      MatchResult(
+        equalWithinTolerance(left, right),
+        s"${left} was not equal to ${right}",
+        "was equal to"
+      )
+  }
+
+  /** left と right を誤差を考慮して比較する Matcher です。 */
+  def beEqualWithinTolerance(right: => (Double, Double)) = Matcher {
+    (left: (Double, Double)) =>
+      MatchResult(
+        equalWithinTolerance(left._1, right._1) && equalWithinTolerance(left._2, right._2),
+        s"${left} was not equal to ${right}",
+        "was equal to"
+      )
+  }
+
+  /** left と right を誤差を考慮して比較する Matcher です。 */
+  def beEqualWithinTolerance(right: Seq[Double]) = Matcher {
+    (left: Seq[Double]) =>
+      MatchResult(
+        equalWithinTolerance(left, right),
+        s"${left} was not equal to ${right}",
+        "was equal to"
+      )
+  }
+
+  /** left と right を誤差を考慮して比較する Matcher です。 */
+  def beEqualWithinTolerance(right: (Seq[Double], Seq[Double])) = Matcher {
+    (left: (Seq[Double], Seq[Double])) =>
+      MatchResult(
+        equalWithinTolerance(left._1, right._1) && equalWithinTolerance(left._2, right._2),
+        s"${left} was not equal to ${right}",
+        "was equal to"
+      )
+  }
+
+  "beEqualWithinTolerance" should "compare numbers" in {
+    1d should not be beEqualWithinTolerance(0d)
+    1.00001d should not be beEqualWithinTolerance(1.00002d)
+
+    (+0d) should beEqualWithinTolerance(-0d)
+    1d should beEqualWithinTolerance(1d)
+    1E-300 should beEqualWithinTolerance(1E-300)
+    1E300 should beEqualWithinTolerance(1E300)
+    (1d, 1d) should beEqualWithinTolerance((1d, 1d))
+    Seq(1d) should beEqualWithinTolerance(Seq(1d))
+    (Seq(1d), Seq(1d)) should beEqualWithinTolerance((Seq(1d), Seq(1d)))
+    -7.034414131033201E231 should beEqualWithinTolerance(-7.0344141310332E231)
+    Seq(-7.034414131033201E231) should beEqualWithinTolerance(Seq(-7.0344141310332E231))
+  }
+
+  it should "compare Infinite or NaN" in {
+    Double.NaN should beEqualWithinTolerance(Double.NaN)
+    Double.PositiveInfinity should beEqualWithinTolerance(Double.PositiveInfinity)
+    Double.NegativeInfinity should beEqualWithinTolerance(Double.NegativeInfinity)
+  }
+
+  "Average.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, AvgFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.average(values.map(_._2)))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    AvgFun(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Long, Double)]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy AvgFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  "StdDevFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, StdDevFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.standardDeviation(values.map(_._2)))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    StdDevFun(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Long, Double)]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy StdDevFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  "QuantileFun" should "throw exception when construct with invalid parameter" in {
+    an[IllegalArgumentException] should be thrownBy new QuantileFun(-1E-15)
+    an[IllegalArgumentException] should be thrownBy new QuantileFun(1.0 + 1E-15)
+    an[IllegalArgumentException] should be thrownBy new QuantileFun(Double.NaN)
+    an[IllegalArgumentException] should be thrownBy new QuantileFun(Double.PositiveInfinity)
+    an[IllegalArgumentException] should be thrownBy new QuantileFun(Double.NegativeInfinity)
+  }
+
+  "QuantileFun.apply" should "return correct values" in {
+    forAll { (position: Double0to1, values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, new QuantileFun(position)(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.quantile(values.sorted.map(_._2), position))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    forAll { (position: Double0to1) =>
+      new QuantileFun(position)(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Long, Double)]())
+    }
+  }
+
+  it should "throw exception when invalid input" in {
+    forAll { (position: Double0to1) =>
+      an[IllegalArgumentException] should be thrownBy new QuantileFun(position)(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+    }
+  }
+
+  "LinApproxFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, LinApproxFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.regressionline(values.sorted.map(_._2)))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    LinApproxFun(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Int, (Double, Double))]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy LinApproxFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  "FourierCoeffsFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, FourierCoeffsFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.fourierCoefficients(values.sorted.map(_._2)))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    FourierCoeffsFun(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Int, (Seq[Double], Seq[Double]))]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy FourierCoeffsFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  "WaveletCoeffsFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], partition: Int1to4) =>
+      test(values, WaveletCoeffsFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.waveletCoefficients(values.sorted.map(_._2)))
+      }
+    }
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy WaveletCoeffsFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  it should "transform short length values" in {
+    WaveletCoeffsFun(Seq[(Int, (Long, Double))]()).collect().size should be(0)
+    WaveletCoeffsFun(Seq((1, (1, 4d)))).collect().find(_._1 == 1).get._2 should be(Seq(4d))
+  }
+
+  it should "be fast" in {
+    import org.scalatest.time.SpanSugar._
+    val data = for {
+      key <- (1 to 128)
+      value <- (0 to 65535)
+    } yield (key, (value, value.toDouble))
+
+    failAfter(8 seconds) {
+      WaveletCoeffsFun(sc.parallelize(data, 4))
+    }
+  }
+
+  "HistogramFun" should "throw exception when construct with invalid parameter" in {
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(0.1d, 0.9d, -1)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(0.1d, 0.9d, 0)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(0.1d, 0.9d, 1)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(Double.NegativeInfinity, Double.PositiveInfinity, 1)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(Double.PositiveInfinity, Double.PositiveInfinity, 1)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(Double.NegativeInfinity, Double.NegativeInfinity, 1)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(0.1d, 0.9d, 2)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(0d, 0d, 3)
+    an[IllegalArgumentException] should be thrownBy new HistogramFun(1d, 0d, 3)
+  }
+
+  "HistogramFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, Double))], bound1: Double0to1, bound2: Double0to1, num: Int0to100, partition: Int1to4) =>
+      val numBins = num.value + 1
+
+      val (lowestUpperBound, highestLowerBound) = numBins match {
+        case 1 => (Double.PositiveInfinity, Double.NegativeInfinity)
+        case 2 => (bound1.value, bound1.value)
+        case _ => (math.min(bound1, bound2), math.max(bound1, bound2))
+      }
+
+      test(values, new HistogramFun(lowestUpperBound, highestLowerBound, numBins)(values.repartition(partition)).collect()) {
+        case (values, result) => result should beEqualWithinTolerance(ReferenceImplementation.histogram(values.sorted.map(_._2), lowestUpperBound, highestLowerBound, numBins))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    new HistogramFun()(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Int, (Double, Double))]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy new HistogramFun()(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+
+  "ConcatFun" should "throw exception when construct with invalid parameter" in {
+    an[IllegalArgumentException] should be thrownBy new ConcatFun(null)
+  }
+
+  "ConcatFun.apply" should "return correct values" in {
+    forAll { (values: Seq[(Int0to7, (Int, String))], separator: String, partition: Int1to4) =>
+      test(values, new ConcatFun(separator)(values.repartition(partition)).collect()) {
+        case (values, result) => result should be(ReferenceImplementation.concatenate(values.sorted.map(_._2), separator))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    new ConcatFun()(Seq[(Int, (Long, String))]()).collect() should be(Seq[(Long, String)]())
+    forAll { (separator: String) =>
+      new ConcatFun(separator)(Seq[(Int, (Long, String))]()).collect() should be(Seq[(Long, String)]())
+    }
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy new ConcatFun()(null.asInstanceOf[RDD[(Int, (Long, String))]])
+    forAll { (separator: String) =>
+      an[IllegalArgumentException] should be thrownBy new ConcatFun(separator)(null.asInstanceOf[RDD[(Int, (Long, String))]])
+    }
+  }
+
+  "MaxElemFun.apply" should "return correct values" in {
+    MaxElemFun(Seq((1, (3, "f")), (1, (2, "m")), (1, (4, "m")), (1, (1, "f"))).repartition(4)).collect() should be(Seq((1, "m")))
+    MaxElemFun(Seq((1, (2, "m")), (1, (3, "f")), (1, (1, "f")), (1, (4, "m"))).repartition(1)).collect() should be(Seq((1, "m")))
+
+    forAll { (keyValues: Seq[(Int0to7, Double)], partition: Int1to4) =>
+      val values = keyValues.zipWithIndex.map { case ((key, value), index) => (key, (index, value)) }
+      test(values, MaxElemFun(values.repartition(partition)).collect()) {
+        case (values, result) => result should be(ReferenceImplementation.mode(values.sortBy(_._1).map(_._2)))
+      }
+    }
+  }
+
+  it should "return empty seq when input empty" in {
+    MaxElemFun(Seq[(Int, (Long, Double))]()).collect() should be(Seq[(Long, Double)]())
+  }
+
+  it should "throw exception when invalid input" in {
+    an[IllegalArgumentException] should be thrownBy MaxElemFun(null.asInstanceOf[RDD[(Int, (Long, Double))]])
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementation.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementation.scala
new file mode 100644
index 0000000..a1803e0
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementation.scala
@@ -0,0 +1,157 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.udf
+
+import scala.math.Ordering
+import scala.util.Random
+import org.apache.commons.math3.stat.descriptive.moment.Mean
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation
+import org.apache.commons.math3.stat.descriptive.rank.Percentile
+import org.apache.commons.math3.stat.regression.SimpleRegression
+import org.apache.commons.math3.transform.DftNormalization
+import org.apache.commons.math3.transform.FastFourierTransformer
+import org.apache.commons.math3.transform.TransformType
+import scala.collection.mutable.LinkedHashMap
+
+/**
+ * 参照実装です。
+ * 計算量を度外視し確実に動くことを重視します。
+ */
+object ReferenceImplementation {
+  import math._
+  def nextPowerOf2(value: Int) = pow(2, ceil(log(value) / log(2))).toInt
+
+  def sortByIndex[_Index: Ordering, _Value](values: Seq[(_Index, _Value)]): Seq[(_Index, _Value)] = values.sortBy(_._1)
+  def stripIndex[_Index: Ordering, _Value](values: Seq[(_Index, _Value)]): Seq[_Value] = values.map { case (_, value) => value }
+  def shuffle[_Index, _Value](values: Seq[(_Index, _Value)]): Seq[(_Index, _Value)] = Random.shuffle(values)
+
+  def average(values: Seq[Double]): Double = new Mean().evaluate(values.toArray)
+
+  /**
+   * アルゴリズムの差による∞とNaNの取り扱いを確認した後に、commons-mathで値を計算します。
+   */
+  def standardDeviation(values: Seq[Double]): Double = {
+    val meanSquared = pow(values.sum / values.size, 2)
+    val squaredMean = values.map(pow(_, 2)).sum / values.size
+    sqrt(squaredMean - meanSquared)
+  }
+
+  /**
+   * ■ 数が合わない時は最も近い値を取る
+   * ■ 候補が2つある場合は大きい値を取る
+   */
+  def quantile(values: Seq[Double], position: Double): Double = position match {
+    case 0d => values.min
+    case position => {
+      val index = ((values.length - 1) * position).round.toInt
+      values.sorted.iterator.drop(index).next
+    }
+  }
+
+  def regressionline(values: Seq[Double]): (Double, Double) = {
+    val regression = new SimpleRegression()
+    values.zipWithIndex.foreach { case (value, index) => regression.addData(index, value) }
+    (regression.getSlope(), regression.getIntercept())
+  }
+
+  def fourierCoefficients(values: Seq[Double]): (Seq[Double], Seq[Double]) = {
+    val data = Array.ofDim[Double](2, nextPowerOf2(values.length))
+    values.copyToArray(data(0))
+    FastFourierTransformer.transformInPlace(data, DftNormalization.STANDARD, TransformType.INVERSE)
+    (data(0), data(1))
+  }
+
+  def waveletCoefficients(values: Seq[Double]): Seq[Double] = {
+    val data = Array.ofDim[Double](nextPowerOf2(values.length))
+    values.copyToArray(data)
+    WaveletCoeffsFun.transform(data)
+  }
+
+  /**
+   * <code>
+   * lowestUpperBound = 0,  highestLowerBound = 100, numBins = 7 のとき
+   * ビン幅は 20
+   * 各ビンの上限、下限は
+   * bin 0 [ -∞ 〜   0)
+   * bin 1 [  0 〜  20)
+   * bin 2 [ 20 〜  40)
+   * bin 3 [ 40 〜  60)
+   * bin 4 [ 60 〜  80)
+   * bin 5 [ 80 〜 100)
+   * bin 6 [100 〜 +∞)
+   * </code>
+   */
+  def histogram(values: Seq[Double], lowestUpperBound: Double, highestLowerBound: Double, numBins: Int): Seq[Double] = {
+    val bounds = numBins match {
+      case 0 => throw new IllegalArgumentException(s"numBins(=${numBins}) should be >=1")
+      case 1 => Seq(Double.NegativeInfinity, Double.PositiveInfinity)
+      case 2 => {
+        if (lowestUpperBound == highestLowerBound)
+          Seq(Double.NegativeInfinity, lowestUpperBound, Double.PositiveInfinity)
+        else
+          throw new IllegalArgumentException(s"if numBins == 2 then lowestUpperBound(=${lowestUpperBound}) should equal highestLowerBound(=${highestLowerBound})")
+      }
+      case _ =>
+        val rangeWidth = highestLowerBound - lowestUpperBound
+        val rangeBins = numBins - 2
+        val width = rangeWidth / rangeBins
+        Double.NegativeInfinity +: (0 to rangeBins).map(_ * width + lowestUpperBound) :+ Double.PositiveInfinity
+    }
+
+    case class Bin(lowerBound: Double, upperBound: Double, var count: Int) { def contains(value: Double) = lowerBound <= value && value < upperBound }
+    val bins = bounds.sliding(2).map { case Seq(lowerBound, upperBound) => Bin(lowerBound, upperBound, 0) }.toSeq
+
+    var size = 0
+    values.foreach { value =>
+      size += 1
+      bins.find(_.contains(value)) match {
+        case Some(bin) => bin.count += 1
+        case None      => ()
+      }
+    }
+
+    bins.map(_.count.toDouble / size)
+  }
+
+  def concatenate(values: Seq[String], separator: String): String = {
+    if (separator == null) throw new IllegalArgumentException
+    values.foldLeft(new StringBuilder) {
+      case (a, v) => v match {
+        case null => throw new NullPointerException
+        case ""   => a
+        case v    => (if (a.length > 0) a.append(separator) else a).append(v)
+      }
+    }.toString
+  }
+
+  def mode[_Value: Ordering](values: Seq[_Value]): _Value = {
+    require(!values.isEmpty)
+
+    case class Counter(var count: Int)
+
+    values.foldLeft(LinkedHashMap[_Value, Counter]()) {
+      case (counters, value) =>
+        if (value == null)
+          throw new NullPointerException
+        else
+          counters.get(value) match {
+            case Some(counter) => counter.count += 1
+            case None          => counters.put(value, Counter(1))
+          }
+        counters
+    }.toSeq.reverse.maxBy { case (value, counter) => counter.count }._1
+  }
+}
diff --git a/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementationTest.scala b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementationTest.scala
new file mode 100644
index 0000000..db40e52
--- /dev/null
+++ b/processor/src/test/scala/us/jubat/jubaql_server/processor/udf/ReferenceImplementationTest.scala
@@ -0,0 +1,143 @@
+// Jubatus: Online machine learning framework for distributed environment
+// Copyright (C) 2015 Preferred Networks and Nippon Telegraph and Telephone Corporation.
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License version 2.1 as published by the Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+package us.jubat.jubaql_server.processor.udf
+
+import org.scalatest.Matchers
+import com.typesafe.scalalogging.slf4j.LazyLogging
+import org.scalatest.FlatSpec
+import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalacheck.Gen
+
+class ReferenceImplementationTest extends FlatSpec with Matchers with GeneratorDrivenPropertyChecks with LazyLogging {
+  "ReferenceImplementation shuffle and sort" should "return 同じ順序の values" in {
+    val values = (1 to 100).map((_, 1d))
+    ReferenceImplementation.sortByIndex(ReferenceImplementation.shuffle(values)) should be(values)
+  }
+
+  "ReferenceImplementation.stripIndex" should "return インデックスを除いた values" in {
+    ReferenceImplementation.stripIndex(Seq((0, 1d), (1, 1d), (2, 1d))) should be(Seq(1d, 1d, 1d))
+  }
+
+  "ReferenceImplementation.average" should "be correct" in {
+    ReferenceImplementation.average(Seq(1d, 2d, 3d)) should be(2d)
+  }
+
+  "ReferenceImplementation.standardDeviation" should "be correct" in {
+    ReferenceImplementation.standardDeviation(Seq(1d, 2d, 3d)) should be(0.816496580927726 +- 1E-15)
+    ReferenceImplementation.standardDeviation(Seq(4.72398231760014E-249, -5.494663445551323E-162)) should be(2.2227587494850775E-162 +- 1E-15)
+  }
+
+  "ReferenceImplementation.quantile" should "be correct" in {
+    ReferenceImplementation.quantile(Seq(2d, 1d), 0.5d) should be(2d)
+    ReferenceImplementation.quantile(Seq(1d, 2d), 0.5d) should be(2d)
+
+    ReferenceImplementation.quantile(Seq(1d, 2d, 3d), 0.0d) should be(1d)
+    ReferenceImplementation.quantile(Seq(1d, 2d, 3d), 0.5d) should be(2d)
+    ReferenceImplementation.quantile(Seq(1d, 2d, 3d), 1.0d) should be(3d)
+    ReferenceImplementation.quantile(Seq(2.306041230747762E-295, 8.27061849935151E112, 1.7803154582292577E179), 0.5) should be(8.27061849935151E112)
+  }
+
+  "ReferenceImplementation.regressionline" should "be correct" in {
+    ReferenceImplementation.regressionline(Seq(0d, 1d, 2d)) should be((1d, 0d))
+  }
+
+  "ReferenceImplementation.fourierCoefficients" should "be correct" in {
+    import math._
+    val N = 128
+    val values = (0 until N).map(t => sin(2 * Pi * t / N) + cos(2 * Pi * t / N)).toArray
+
+    import math._
+
+    /**
+     * @see http://www.riken.jp/brict/Ijiri/study/FourierTransform_disc.html
+     */
+    def fourieTrans1D(fl: Array[Double]) = {
+      val N = fl.length
+      val w0 = 2 * Pi / N
+
+      val Rk = Array.ofDim[Double](N)
+      val Ik = Array.ofDim[Double](N)
+
+      for (k <- 0 until N) {
+        Rk(k) = 0
+        Ik(k) = 0
+        for (m <- 0 until N) {
+          Rk(k) += fl(m) * cos(w0 * k * m)
+          Ik(k) += -fl(m) * sin(w0 * k * m)
+        }
+        Rk(k) /= N
+        Ik(k) /= N
+      }
+
+      (Rk, Ik)
+    }
+
+    def validLength = for {
+      size <- Gen.choose(1, 8)
+      values <- Gen.listOfN(pow(2, size).toInt, Gen.choose(-100.0, +100.0))
+    } yield values
+
+    forAll(validLength) { (values) =>
+      val naive = fourieTrans1D(values.toArray)
+      val target = ReferenceImplementation.fourierCoefficients(values)
+
+      def shouldBeEqualsAbsValues(v: (Double, Double)): Unit = v match { case (l, r) => abs(l) should be(abs(r) +- 1E-10) }
+      target._1.zip(naive._1).foreach(shouldBeEqualsAbsValues)
+      target._2.zip(naive._2).foreach(shouldBeEqualsAbsValues)
+    }
+  }
+
+  "ReferenceImplementation.waveletCoefficients" should "be correct" in {
+    ReferenceImplementation.waveletCoefficients(Seq[Double]()) should be(Seq[Double]())
+    ReferenceImplementation.waveletCoefficients(Seq(0d)) should be(Seq(0d))
+    ReferenceImplementation.waveletCoefficients(Seq(32d, 10d, 20d, 38d, 37d, 28d, 38d, 34d, 18d, 24d, 18d, 9d, 23d, 24d, 28d, 34d)) should be(Seq(25.9375d, 3.6875d, -4.625d, -5d, -4d, -1.75d, 3.75d, -3.75d, 11d, -9d, 4.5d, 2d, -3d, 4.5d, -0.5d, -3d))
+    ReferenceImplementation.waveletCoefficients(Seq(3d, 1d, 0d, 4d, 8d, 6d, 9d, 9d)) should be(Seq(5d, -3d, 0d, -1d, 1d, -2d, 1d, 0d))
+    ReferenceImplementation.waveletCoefficients(Seq(9d, 7d, 3d, 5d, 6d, 10d, 2d, 6d)) should be(Seq(6d, 0d, 2d, 2d, 1d, -1d, -2d, -2d))
+  }
+
+  "ReferenceImplementation.histogram" should "be correct" in {
+    ReferenceImplementation.histogram(Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), 1, 9, 10) should be(Seq(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1))
+    ReferenceImplementation.histogram(Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), 0, 10, 7) should be(Seq(0.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0))
+    ReferenceImplementation.histogram(Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), 1, 9, 1) should be(Seq(1.0))
+    ReferenceImplementation.histogram(Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), 1, 1, 2) should be(Seq(0.1, 0.9))
+    ReferenceImplementation.histogram(Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), 1, 9, 3) should be(Seq(0.1, 0.8, 0.1))
+  }
+
+  "ReferenceImplementation.concatenate" should "be correct" in {
+    an[IllegalArgumentException] should be thrownBy ReferenceImplementation.concatenate(Seq[String](), null)
+    an[IllegalArgumentException] should be thrownBy ReferenceImplementation.concatenate(Seq("a", "b"), null)
+    an[NullPointerException] should be thrownBy ReferenceImplementation.concatenate(Seq[String](null), ",")
+    ReferenceImplementation.concatenate(Seq[String](), ",") should be("")
+    ReferenceImplementation.concatenate(Seq("a"), ",") should be("a")
+    ReferenceImplementation.concatenate(Seq("a", "b"), ",") should be("a,b")
+    ReferenceImplementation.concatenate(Seq("a", "b", "c"), ",") should be("a,b,c")
+
+    forAll { (values: Seq[String], separator: String) =>
+      ReferenceImplementation.concatenate(values, separator) should be(values.filterNot(_.isEmpty).mkString(separator))
+    }
+  }
+
+  "ReferenceImplementation.mode" should "be correct" in {
+    an[IllegalArgumentException] should be thrownBy ReferenceImplementation.mode(Seq[String]())
+    ReferenceImplementation.mode(Seq("a")) should be("a")
+    ReferenceImplementation.mode(Seq("a", "b")) should be("b")
+    ReferenceImplementation.mode(Seq("a", "b", "a")) should be("a")
+    ReferenceImplementation.mode(Seq("a", "b", "a", "b")) should be("b")
+    ReferenceImplementation.mode(Seq("f", "m", "f", "m")) should be("m")
+
+    an[NullPointerException] should be thrownBy ReferenceImplementation.mode(Seq("a", null))
+  }
+}
\ No newline at end of file