Skip to content

Introduce GeoSpatial Index To CouchDB

Peng Hui Jiang edited this page Oct 16, 2017 · 4 revisions

NOTE These instructions may be a bit out of date with regards to some of the file formats. Most likely the rebar.config changes will need updated. If you run through these and find differences on Apache CouchDB's master please create an issue on this repository with any updates.

Let us describe the experience of introducing GeoSpatial Index to CouchDB.

Download and configure CouchDB

a. Download mirror of Apache CouchDB

git clone [email protected]:apache/couchdb.git

b. Modify rebar.config.script in order to load hastings and easton dependent repositories

DepDescs = [
...
{easton,           "easton",           "bc03c71de798be1f0e4b3c74da0eaeb7a2938c78"},
...
{hastings ,        "hastings",         "1cca585bc63c3c246b674fca33a661b083e9f6f7"},
...
],

MakeDep = fun
    ({AppName, {url, Url}, Version}) ->
        {AppName, ".*", {git, Url, Version}};
    ({AppName, {url, Url}, Version, Options}) ->
        {AppName, ".*", {git, Url, Version}, Options};
    ({AppName, RepoName, Version}) when AppName == hastings; AppName == easton ->
        Url = "[email protected]:cloudant-labs/" ++ RepoName ++ ".git",
        {AppName, ".*", {git, Url, Version}};
    ({AppName, RepoName, Version}) ->
        Url = BaseUrl ++ "couchdb-" ++ RepoName ++ ".git",
        {AppName, ".*", {git, Url, Version}};
    ({AppName, RepoName, Version, Options}) ->
        Url = BaseUrl ++ "couchdb-" ++ RepoName ++ ".git",
        {AppName, ".*", {git, Url, Version}, Options}

c. Configure CouchDB and its dependencies

./configure --disable-fauxton --disable-docs

Integrate hastings into CouchDB

Add hastings to application list to be started

a. add hastings_epi plugin to couch_epi.config under rel/apps/ directory

{plugins, [
    couch_db_epi,
    chttpd_epi,
    couch_index_epi,
    global_changes_epi,
    hastings_epi,
    mango_epi,
    mem3_epi,
    setup_epi
]}.

b. Add hastings to reltool.config under rel directory

{sys, [

    {rel, "couchdb", "2.0.0a", [
        ...
        global_changes,
        hastings,
        ibrowse,
        ...
    ]},
    %% couchdb
  
    {app, global_changes, [{incl_cond, include}]},
    {app, hastings, [{incl_cond, include}]},
    {app, ibrowse, [{incl_cond, include}]},
     
 ]}
     

Enable spatial.js for hastings

a. Create spatial.js under share/server directory using below content

var Spatial = (function() {

  var index_results = []; // holds temporary emitted values during index

  function handleIndexError(err, doc) {
    if (err == "fatal_error") {
      throw(["error", "map_runtime_error", "function raised 'fatal_error'"]);
    } else if (err[0] == "fatal") {
      throw(err);
    }
    var message = "function raised exception " + err.toSource();
    if (doc) message += " with doc._id " + doc._id;
    log(message);
  };

  return {
    index: function(value, options) {
        index_results.push([value, options || {}]);
    },

    indexDoc: function(doc) {
      Couch.recursivelySeal(doc);
      var buf = [];
      for each (fun in State.funs) {
        index_results = [];
        try {
          fun(doc);
          buf.push(index_results);
        } catch (err) {
          handleIndexError(err, doc);
          buf.push([]);
        }
      }
      print(JSON.stringify(buf));
    }

  }
})();

b. add sandbox.st_index = Spatial.index; to loop.js under share/server directory

function create_sandbox() {
  try {
    // if possible, use evalcx (not always available)
    var sandbox = evalcx('');
    sandbox.emit = Views.emit;
    sandbox.sum = Views.sum;
    sandbox.log = log;
    sandbox.toJSON = JSON.stringify;
    sandbox.JSON = JSON;
    sandbox.provides = Mime.provides;
    sandbox.registerType = Mime.registerType;
    sandbox.start = Render.start;
    sandbox.send = Render.send;
    sandbox.getRow = Render.getRow;
    sandbox.isArray = isArray;
    sandbox.st_index = Spatial.index;
  } catch (e) {
    var sandbox = {};
  }
  return sandbox;
};

b. add "st_index_doc": Spatial.indexDoc branch to loop.js under share/server directory

var Loop = function() {  
  var line, cmd, cmdkey, dispatch = {  
    "ddoc"     : DDoc.ddoc,  
    // "view"    : Views.handler,  
    "reset"    : State.reset,  
    "add_fun"  : State.addFun,  
    "add_lib"  : State.addLib,  
    "map_doc"  : Views.mapDoc,  
    "reduce"   : Views.reduce,  
    "rereduce" : Views.rereduce,  
    "st_index_doc": Spatial.indexDoc  
  };

c. add share/server/spatial.js to build_js.escript under support directory

main([]) ->
    JsFiles = ["share/server/json2.js",
               "share/server/filter.js",
               "share/server/mimeparse.js",
               "share/server/render.js",
               "share/server/state.js",
               "share/server/util.js",
               "share/server/validate.js",
               "share/server/views.js",
               "share/server/spatial.js",
               "share/server/loop.js"],

    CoffeeFiles = ["share/server/json2.js",
                   "share/server/filter.js",
                   "share/server/mimeparse.js",
                   "share/server/render.js",
                   "share/server/state.js",
                   "share/server/util.js",
                   "share/server/validate.js",
                   "share/server/views.js",
                   "share/server/spatial.js",
                   "share/server/coffee-script.js",
                   "share/server/loop.js"],
                   

Change on hastings to be compatible with CouchDB

a. in src/hastings_index_updater.erl delete the line that says {user, cloudant_util:customer_name(Db)},.

Enable Geospatial search

Add the following entries to default.ini under couchdb/rel/overlay/etc

[hastings]
enabled = true

Enjoy GeoSpatial Index in CouchDB

After making couchdb with enabled hastings, you can add documents with geo information to database and perform search.

jiangphs-mbp:geotest jiangph$ curl -u foo:bar -X GET 'http://localhost:15984/reltest1/_design/geodd/_geo/geoidx?bbox=-180,-90,180,90&limit=2' | python -m json.tool
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   301  100   301    0     0     97      0  0:00:03  0:00:03 --:--:--    97
{
    "bookmark": "g2wAAAABaANkAA9ub2RlMkAxMjcuMC4wLjFsAAAAAmIgAAAAYj____9qaAJtAAAAIDc5ZjE0YjY0YzU3NDYxNTg0YjE1MjEyM2UzOGE2NDQ5RkBUsmENOjDEag",
    "rows": [
        {
            "geometry": {
                "coordinates": [
                    -71.13687953,
                    42.34690635
                ],
                "type": "Point"
            },
            "id": "79f14b64c57461584b152123e38a6449",
            "rev": "1-13051c832df60ec3e9869cdc801a2104"
        }
    ]
}

Troubleshooting

Case 1: noproc error when triggering Geo indexing

{"error":"{noproc,\n    {gen_server,call,\n        [hastings_index_manager,\n         {get_index,<<\"shards/e0000000-ffffffff/reltest.1477017301\">>,\n             {h_idx,undefined,undefined,<<\"_design/geodd\">>,<<\"geoidx\">>,\n                 <<\"function(doc) {if (doc.geometry && doc.geometry.coordinates) {st_index(doc.geometry);}}\">>,\n                 <<\"javascript\">>,<<\"rtree\">>,2,4326,0,\n                 <<\"e66df316792ab411705e2741bba44371\">>}},\n         infinity]}}","reason":"[{gen_server,call,3,[{file,\"gen_server.erl\"},{line,212}]},\n {hastings_rpc,get_index_pid,3,[{file,\"src/hastings_rpc.erl\"},{line,64}]},\n {hastings_rpc,search,4,[{file,\"src/hastings_rpc.erl\"},{line,22}]},\n {rexi_server,init_p,3,[{file,\"src/rexi_server.erl\"},{line,139}]}]"}

Solution: Need to check log file and see whether hastings application has been started and see whether there is entry about startup of hastings server. If not, review section add hastings to application list to be started

[info] 2016-10-21T10:01:51.919228Z [email protected] <0.7.0> -------- Application couch_replicator started on node '[email protected]'
[info] 2016-10-21T10:01:51.919320Z [email protected] <0.7.0> -------- Application bear started on node '[email protected]'
[info] 2016-10-21T10:01:51.919397Z [email protected] <0.7.0> -------- Application easton started on node '[email protected]'
[info] 2016-10-21T10:01:51.921395Z [email protected] <0.7.0> -------- Application hastings started on node '[email protected]'
[info] 2016-10-21T10:01:51.922600Z [email protected] <0.7.0> -------- Application global_changes started on node '[email protected]'
[info] 2016-10-21T10:01:51.922632Z [email protected] <0.7.0> -------- Application couch_plugins started on node '[email protected]'
[info] 2016-10-21T10:01:51.955120Z [email protected] <0.7.0> -------- Application runtime_tools started on node '[email protected]'
[info] 2016-10-21T10:01:51.955571Z [email protected] <0.7.0> -------- Application ddoc_cache started on node '[email protected]'
[info] 2016-10-21T10:01:51.955855Z [email protected] <0.7.0> -------- Application couch_index started on node '[email protected]'

Case 2: unknown_command error when triggering Geo indexing

{
    "error": "{nocatch,{unknown_command,<<\"unknown command 'st_index_doc'\">>}}",
    "reason": "[{couch_os_process,prompt,2,[{file,\"src/couch_os_process.erl\"},{line,59}]},\n {couch_query_servers,proc_prompt,2,\n                      [{file,\"src/couch_query_servers.erl\"},{line,427}]},\n {hastings_index_updater,load_docs,2,\n                         [{file,\"src/hastings_index_updater.erl\"},{line,100}]},\n {couch_db,conv_to_full_doc_info,2,[{file,\"src/couch_db.erl\"},{line,1533}]},\n {couch_bt_engine,drop_reductions,4,\n                  [{file,\"src/couch_bt_engine.erl\"},{line,828}]},\n {couch_btree,stream_kv_node2,8,[{file,\"src/couch_btree.erl\"},{line,783}]},\n {couch_btree,fold,4,[{file,\"src/couch_btree.erl\"},{line,220}]},\n {couch_bt_engine,fold_changes,5,\n                  [{file,\"src/couch_bt_engine.erl\"},{line,431}]}]"
}

Solution: Need to reivew section enable spatial.js for hastings and decide whether all steps are taken.