From df8e77056484e852ab8600263d1951767356e27b Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Wed, 10 Jan 2024 23:42:46 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20=20@=203efec?= =?UTF-8?q?fb00d785aa03f9d907b7b1d5085fa2fb1e6=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 404.html | 4 ++-- _src/write.md | 9 --------- assets/js/ca95d5ad.11f5a106.js | 1 + assets/js/ca95d5ad.fffa9184.js | 1 - ...runtime~main.67e379df.js => runtime~main.9a2f0b03.js} | 2 +- blog/archive/index.html | 4 ++-- blog/incremental/index.html | 4 ++-- blog/index.html | 4 ++-- blog/tags/glean/index.html | 4 ++-- blog/tags/incremental/index.html | 4 ++-- blog/tags/index.html | 4 ++-- docs/angle/advanced/index.html | 4 ++-- docs/angle/debugging/index.html | 4 ++-- docs/angle/efficiency/index.html | 4 ++-- docs/angle/guide/index.html | 4 ++-- docs/angle/intro/index.html | 4 ++-- docs/angle/reference/index.html | 4 ++-- docs/angle/style/index.html | 4 ++-- docs/building/index.html | 4 ++-- docs/cli/index.html | 4 ++-- docs/databases/index.html | 4 ++-- docs/derived/index.html | 4 ++-- docs/implementation/incrementality/index.html | 4 ++-- docs/indexer/cxx/index.html | 4 ++-- docs/indexer/flow/index.html | 4 ++-- docs/indexer/hack/index.html | 4 ++-- docs/indexer/haskell/index.html | 4 ++-- docs/indexer/intro/index.html | 4 ++-- docs/indexer/lsif-go/index.html | 4 ++-- docs/indexer/lsif-java/index.html | 4 ++-- docs/indexer/lsif-rust/index.html | 4 ++-- docs/indexer/lsif-typescript/index.html | 4 ++-- docs/indexer/scip-dotnet/index.html | 4 ++-- docs/indexer/scip-python/index.html | 4 ++-- docs/introduction/index.html | 4 ++-- docs/query/api/haskell/index.html | 4 ++-- docs/query/haskell/index.html | 4 ++-- docs/query/intro/index.html | 4 ++-- docs/running/index.html | 4 ++-- docs/schema/all/index.html | 4 ++-- docs/schema/basic/index.html | 4 ++-- docs/schema/changing/index.html | 4 ++-- docs/schema/design/index.html | 4 ++-- docs/schema/recursion/index.html | 4 ++-- docs/schema/syntax/index.html | 4 ++-- docs/schema/thrift/index.html | 4 ++-- docs/schema/types/index.html | 4 ++-- docs/schema/workflow/index.html | 4 ++-- docs/server/index.html | 4 ++-- docs/shell/index.html | 4 ++-- docs/trying/index.html | 4 ++-- docs/walkthrough/index.html | 4 ++-- docs/write/index.html | 6 +++--- index.html | 4 ++-- 54 files changed, 103 insertions(+), 112 deletions(-) create mode 100644 assets/js/ca95d5ad.11f5a106.js delete mode 100644 assets/js/ca95d5ad.fffa9184.js rename assets/js/{runtime~main.67e379df.js => runtime~main.9a2f0b03.js} (99%) diff --git a/404.html b/404.html index d5a75108c..7795b9a24 100644 --- a/404.html +++ b/404.html @@ -5,14 +5,14 @@ Page Not Found | Glean - +
Skip to main content

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- + \ No newline at end of file diff --git a/_src/write.md b/_src/write.md index c41472ae1..33a5c9786 100644 --- a/_src/write.md +++ b/_src/write.md @@ -5,7 +5,6 @@ sidebar_label: Writing data to Glean --- import {OssOnly, FbInternalOnly} from 'internaldocs-fb-helpers'; -import Scribe from './fb/scribe.md'; import Backup from './fb/backup.md'; import {SrcFile,SrcFileLink} from '@site/utils'; @@ -36,12 +35,6 @@ A database can be created by a client using any of these methods: 2. On the command line: invoke the `glean` command-line tool to send data in JSON format, see [ Creating a database using the command line](#creating-a-database-using-the-command-line). 3. In the shell, use `glean shell --db-root=` and then use the command `:load` to create a DB from a JSON file. See [Loading a DB from JSON in the shell](#loading-a-db-from-json-in-the-shell). - - -4. Via Scribe, see [Writing data using Scribe](#writing-data-using-scribe) - - - ## Server-driven writing Large indexing jobs are coordinated by the server, using a *recipe* to @@ -250,6 +243,4 @@ glean shell --service :list This will list the DBs available on the write server. - - diff --git a/assets/js/ca95d5ad.11f5a106.js b/assets/js/ca95d5ad.11f5a106.js new file mode 100644 index 000000000..ac251b1ea --- /dev/null +++ b/assets/js/ca95d5ad.11f5a106.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkwebsite=self.webpackChunkwebsite||[]).push([[3345],{3905:(e,n,t)=>{t.r(n),t.d(n,{MDXContext:()=>s,MDXProvider:()=>u,mdx:()=>h,useMDXComponents:()=>m,withMDXComponents:()=>c});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(){return r=Object.assign||function(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){return function(n){var t=m(n.components);return a.createElement(e,r({},n,{components:t}))}},m=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},u=function(e){var n=m(e.components);return a.createElement(s.Provider,{value:n},e.children)},f={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,s=d(e,["components","mdxType","originalType","parentName"]),c=m(t),u=i,p=c["".concat(o,".").concat(u)]||c[u]||f[u]||r;return t?a.createElement(p,l(l({ref:n},s),{},{components:t})):a.createElement(p,l({ref:n},s))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=p;var l={};for(var d in n)hasOwnProperty.call(n,d)&&(l[d]=n[d]);l.originalType=e,l.mdxType="string"==typeof e?e:i,o[1]=l;for(var s=2;s{t.d(n,{EO:()=>l,O1:()=>o,Rr:()=>d});var a=t(67294),i=t(44256);let r;function o(e){return a.createElement("a",{href:r+e.file},e.file)}function l(e){return a.createElement("a",{href:r+e.file},e.children)}r=(0,i.isInternal)()?"https://www.internalfb.com/code/fbsource/fbcode/":"https://github.com/facebookincubator/Glean/tree/master/";const d=e=>{let{children:n,internal:t,external:r}=e;return(0,i.fbContent)({internal:a.createElement("code",null,t),external:a.createElement("code",null,r)})}},1554:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>c,contentTitle:()=>d,default:()=>h,frontMatter:()=>l,metadata:()=>s,toc:()=>m});var a=t(83117),i=(t(67294),t(3905)),r=t(44256),o=t(12038);const l={id:"write",title:"Writing data to Glean",sidebar_label:"Writing data to Glean"},d=void 0,s={unversionedId:"write",id:"write",title:"Writing data to Glean",description:"This page describes the various ways in which data gets into Glean.",source:"@site/docs/write.md",sourceDirName:".",slug:"/write",permalink:"/docs/write",draft:!1,editUrl:"https://github.com/facebookincubator/Glean/tree/main/glean/website/docs/write.md",tags:[],version:"current",frontMatter:{id:"write",title:"Writing data to Glean",sidebar_label:"Writing data to Glean"},sidebar:"someSidebar",previous:{title:"Glean Databases",permalink:"/docs/databases"},next:{title:"Running the Tools",permalink:"/docs/running"}},c={},m=[{value:"Client-driven writing",id:"client-driven-writing",level:2},{value:"Server-driven writing",id:"server-driven-writing",level:2},{value:"APIs for writing",id:"apis-for-writing",level:2},{value:"Writing from the command line",id:"writing-from-the-command-line",level:2},{value:"JSON format",id:"json-format",level:3},{value:"Loading a DB from JSON in the shell",id:"loading-a-db-from-json-in-the-shell",level:3},{value:"Creating a database using the command line",id:"creating-a-database-using-the-command-line",level:3}],u=(f="Backup",function(e){return console.warn("Component "+f+" was not imported, exported, or provided by MDXProvider as global scope"),(0,i.mdx)("div",e)});var f;const p={toc:m};function h(e){let{components:n,...t}=e;return(0,i.mdx)("wrapper",(0,a.Z)({},p,t,{components:n,mdxType:"MDXLayout"}),(0,i.mdx)("p",null,"This page describes the various ways in which data gets into Glean."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"For a complete walkthrough of the steps necessary to write an indexer, see ",(0,i.mdx)("a",{parentName:"p",href:"https://www.internalfb.com/intern/wiki/Glean/How_to:_write_a_Glean_indexer/"},"How to write a Glean indexer"),".")),(0,i.mdx)("p",null,"There are two main methods for creating a DB. Repo-wide indexing jobs\nwhich require multiple workers and have dependent tasks are managed by\nthe server, while simple one-off DB creation can be performed\nindependently by a single client."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"After the data is ingested by the write tier (",(0,i.mdx)("inlineCode",{parentName:"p"},"glean.write"),"), it is backed up and copied to the read tier (",(0,i.mdx)("inlineCode",{parentName:"p"},"glean"),") for efficient access. For newly created DB names, check out ",(0,i.mdx)("a",{parentName:"p",href:"https://www.internalfb.com/intern/wiki/Glean/Write/#configuring-db-backup-an"},"the section below")," for configuring this behavior.")),(0,i.mdx)("h2",{id:"client-driven-writing"},"Client-driven writing"),(0,i.mdx)("p",null,"A database can be created by a client using any of these methods:"),(0,i.mdx)("ol",null,(0,i.mdx)("li",{parentName:"ol"},"Programmatically, using one of the APIs listed in ",(0,i.mdx)("a",{parentName:"li",href:"#apis-for-writing"},"APIs for Writing"),"."),(0,i.mdx)("li",{parentName:"ol"},"On the command line: invoke the ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean")," command-line tool to send data in JSON format, see ",(0,i.mdx)("a",{parentName:"li",href:"#creating-a-database-using-the-command-line"}," Creating a database using the command line"),"."),(0,i.mdx)("li",{parentName:"ol"},"In the shell, use ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean shell --db-root=")," and then use the command ",(0,i.mdx)("inlineCode",{parentName:"li"},":load")," to create a DB from a JSON file. See ",(0,i.mdx)("a",{parentName:"li",href:"#loading-a-db-from-json-in-the-shell"},"Loading a DB from JSON in the shell"),".")),(0,i.mdx)("h2",{id:"server-driven-writing"},"Server-driven writing"),(0,i.mdx)("p",null,"Large indexing jobs are coordinated by the server, using a ",(0,i.mdx)("em",{parentName:"p"},"recipe")," to\ndefine the various tasks and the dependencies between them. Recipes\nare defined in the recipes configuration; see the ",(0,i.mdx)("inlineCode",{parentName:"p"},"--recipe-config"),"\noption in ",(0,i.mdx)("a",{parentName:"p",href:"/docs/running#common-options"},"Common options"),"."),(0,i.mdx)("p",null,"The job proceeds as follows:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"An indexing job is started by calling the server's ",(0,i.mdx)("inlineCode",{parentName:"p"},"kickOff")," Thrift\nmethod. This creates a work queue of tasks on the server.")),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"Clients obtain tasks from the server by calling ",(0,i.mdx)("inlineCode",{parentName:"p"},"getWork"),". Tasks may\nhave dependencies between them, so the server won't hand out a task\nuntil its dependencies are complete.")),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"When all tasks are done, the server marks the database as complete."))),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"For the fbsource indexer, the components of this are:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The coordinator, run by a ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/chronos/job/?jobname=glean.clang.indexer&smc=chronos_gp_admin_client"},"chronos job"),", which calls ",(0,i.mdx)("inlineCode",{parentName:"li"},"kickoff")," and then waits for completion."),(0,i.mdx)("li",{parentName:"ul"},"The workers which poll the server (see ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/wiki/Glean/ClangIndexer/"},"Glean/ClangIndexer"),")"),(0,i.mdx)("li",{parentName:"ul"},"The server (see ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/wiki/Glean/Infrastructure/"},"Glean/Infrastructure"),")"))),(0,i.mdx)("h2",{id:"apis-for-writing"},"APIs for writing"),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The C++ writing API is the most performant. It is used by the clang-based indexer for C++ and Objective C code. See ",(0,i.mdx)("a",{parentName:"li",href:"https://phabricator.intern.facebook.com/diffusion/FBS/browse/master/fbcode/glean/cpp/glean.h"},"glean/cpp/glean.h")),(0,i.mdx)("li",{parentName:"ul"},"In Hack, ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/codex/symbol/php/Glean/genKickOffForHandle/"},"genKickOffForHandle")," and the various functions for writing facts."))),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The Haskell API for writing",(0,i.mdx)("ul",{parentName:"li"},(0,i.mdx)("li",{parentName:"ul"},"Example: ",(0,i.mdx)(o.O1,{file:"glean/client/hs/example/ExampleWriter.hs",mdxType:"SrcFile"}))))),(0,i.mdx)("p",null,"If none of the above work for you, the Thrift API enable basic write\naccess to the database."),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"kickOff")," can be used to create a new DB"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"sendJsonBatch")," is for sending facts in JSON-serialized form"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"finishBatch")," exposes the result of a previously sent JSON batch"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"workFinished")," closes a DB")),(0,i.mdx)("p",null,"A rough outline of a client looks like:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean = make_glean_thrift_client()\ndb_handle = make_uuid()\nglean.kickOff(my_repo, KickOffFill(writeHandle=db_handle))\nfor json_batch in json_batches:\n handle = glean.sendJsonBatch(json_batch)\n result = glean.finishBatch(handle)\n # handle result\nglean.workFinished(my_repo, db_handle, success_or_failure)\n")),(0,i.mdx)("h2",{id:"writing-from-the-command-line"},"Writing from the command line"),(0,i.mdx)("h3",{id:"json-format"},"JSON format"),(0,i.mdx)("p",null,"The JSON format for Glean data is described in ",(0,i.mdx)("a",{parentName:"p",href:"/docs/schema/thrift"},"Thrift and JSON"),"."),(0,i.mdx)("p",null,"Here's an example of JSON data for writing to Glean:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},'[\n { "predicate": "cxx1.Name.1", # define facts for cxx1.Name.1\n "facts": [\n { "id": 1, "key": "abc" }, # define a fact with id 1\n { "id": 2, "key": "def" }\n ]\n },\n { "predicate": "cxx1.FunctionName.1", # define facts for cxx1.FunctionName.1\n "facts": [\n { "id": 3,\n "key": {\n "name": { "id": 1 }}} # reference to fact with id 1\n ]\n },\n { "predicate": "cxx1.FunctionQName.1", # define facts for cxx1.FunctionQName.1\n "facts": [\n { "key": {\n "name": 3, # 3 is shorthand for { "id": 3 }\n "scope": { "global_": {} } } },\n { "key": {\n "name": {\n "key": { # define a nested fact directly\n "name": {\n "key": "ghi" }}}, # another nested fact\n "scope": {\n "namespace_": {\n "key": {\n "name": {\n "key": "std" }}}}}\n ]\n }\n]\n')),(0,i.mdx)("p",null,"The rules of the game are:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"Predicate names must include versions, i.e. ",(0,i.mdx)("inlineCode",{parentName:"li"},"cxx1.Name.1")," rather than ",(0,i.mdx)("inlineCode",{parentName:"li"},"cxx1.Name"),"."),(0,i.mdx)("li",{parentName:"ul"},"The ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," field when defining a fact is optional. The ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," numbers in the input file will ",(0,i.mdx)("em",{parentName:"li"},"not")," be the final ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," numbers assigned to the facts in the database."),(0,i.mdx)("li",{parentName:"ul"},"There are no restrictions on ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," values (any 64-bit integer will do) but an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," value may not be reused within a file."),(0,i.mdx)("li",{parentName:"ul"},"Later facts may refer to earlier ones using either ",(0,i.mdx)("inlineCode",{parentName:"li"},'{ "id": N }')," or just ",(0,i.mdx)("inlineCode",{parentName:"li"},"N"),"."),(0,i.mdx)("li",{parentName:"ul"},"It is only possible to refer to ",(0,i.mdx)("inlineCode",{parentName:"li"},"id"),"s from facts in the same file, if you are writing multiple files using ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean write")," or via the ",(0,i.mdx)("inlineCode",{parentName:"li"},"sendJsonBatch")," API."),(0,i.mdx)("li",{parentName:"ul"},"a nested facts can be defined inline, instead of defining it with an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," first and then referencing it."),(0,i.mdx)("li",{parentName:"ul"},"an inline nested fact can be given an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," and referred to later.")),(0,i.mdx)("h3",{id:"loading-a-db-from-json-in-the-shell"},"Loading a DB from JSON in the shell"),(0,i.mdx)("p",null,"The shell is useful for experimenting with creating a DB from JSON data directly. Let's try loading the data above into a DB in the shell:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"$ mkdir /tmp/glean\n$ glean shell --db-root /tmp/glean\nGlean Shell, dev mode\ntype :help for help.\nno fbsource database availabe\n> :load test/0 /home/smarlow/test\nI0514 01:19:37.137109 3566745 Work.hs:184] test/16: database complete\n")),(0,i.mdx)("p",null,"Let's see what facts we loaded:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"test> :stat\n1\n count: 72\n size: 5988\ncxx1.FunctionName.1\n count: 2\n size: 66\ncxx1.FunctionQName.1\n count: 2\n size: 70\ncxx1.Name.1\n count: 4\n size: 148\ncxx1.NamespaceQName.1\n count: 1\n size: 35\ntest>\n")),(0,i.mdx)("p",null,"Note that there were 4 ",(0,i.mdx)("inlineCode",{parentName:"p"},"cxx1.Name.1")," facts - some of those were defined as inline nested facts in the JSON. We can query them all:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},'test> cxx1.Name _\n4 results, 1 queries, 4 facts, 0.22ms, 44296 bytes\n\n{ "id": 1096, "key": "abc" }\n{ "id": 1097, "key": "def" }\n{ "id": 1100, "key": "ghi" }\n{ "id": 1102, "key": "std" }\n')),(0,i.mdx)("p",null,"Note that the ",(0,i.mdx)("inlineCode",{parentName:"p"},"id")," values here do not correspond to the ",(0,i.mdx)("inlineCode",{parentName:"p"},"id")," values in the input file."),(0,i.mdx)("h3",{id:"creating-a-database-using-the-command-line"},"Creating a database using the command line"),(0,i.mdx)("p",null,"The ",(0,i.mdx)("inlineCode",{parentName:"p"},"glean")," command-line tool can be used to create a database directly on the server."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"There is a default retention policy for databases created this way; for details and to discuss your requirements, talk to the Glean team before creating databases.")),(0,i.mdx)("p",null,"To create a database from a single file of JSON facts:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean create --service --finish --db / \n")),(0,i.mdx)("p",null,"where"),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," can be ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean.write.test")," for testing. ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean.write")," is the production write tier."))),(0,i.mdx)(r.OssOnly,{mdxType:"OssOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," is the ",(0,i.mdx)("inlineCode",{parentName:"li"},"host:port")," of the Glean server"))),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," is the name for your DB. For indexing repositories we normally use the name of the repository, but it's just a string, so you can use whatever you want."),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," identifies this particular instance of your database. For repositories we normally use the revision hash, but, again, it's just a string."),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," the file containing the JSON facts.")),(0,i.mdx)("p",null,"If the file is more than, say, 100MB, this operation will probably time out sending the data to the server. To send large amounts of data you need to batch it up into multiple files, and then send it like this:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean create --service --db /\nglean write --service --db / \nglean write --service --db / \n...\nglean finish --service --db /\n")),(0,i.mdx)("p",null,"To find out if your DB made it:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean shell --service :list\n")),(0,i.mdx)("p",null,"This will list the DBs available on the write server."),(0,i.mdx)(u,{mdxType:"Backup"}))}h.isMDXComponent=!0},47596:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.getSpecInfo=void 0;const i=t(11737);n.getSpecInfo=function(e){return a(this,void 0,void 0,(function*(){return yield i.call({module:"bloks",api:"getSpecInfo",args:{styleId:e}})}))}},11737:function(e,n){var t=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.call=void 0;let a=!1,i=0;const r={};n.call=function(e){return t(this,void 0,void 0,(function*(){if("staticdocs.thefacebook.com"!==window.location.hostname&&"localhost"!==window.location.hostname)return Promise.reject(new Error("Not running on static docs"));a||(a=!0,window.addEventListener("message",(e=>{if("static-docs-bridge-response"!==e.data.event)return;const n=e.data.id;n in r||console.error(`Recieved response for id: ${n} with no matching receiver`),"response"in e.data?r[n].resolve(e.data.response):r[n].reject(new Error(e.data.error)),delete r[n]})));const n=i++,t=new Promise(((e,t)=>{r[n]={resolve:e,reject:t}})),o={event:"static-docs-bridge-call",id:n,module:e.module,api:e.api,args:e.args},l="localhost"===window.location.hostname?"*":"https://www.internalfb.com";return window.parent.postMessage(o,l),t}))}},24855:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.reportFeatureUsage=n.reportContentCopied=void 0;const i=t(11737);n.reportContentCopied=function(e){return a(this,void 0,void 0,(function*(){const{textContent:n}=e;try{yield i.call({module:"feedback",api:"reportContentCopied",args:{textContent:n}})}catch(t){}}))},n.reportFeatureUsage=function(e){return a(this,void 0,void 0,(function*(){const{featureName:n,id:t}=e;console.log("used feature");try{yield i.call({module:"feedback",api:"reportFeatureUsage",args:{featureName:n,id:t}})}catch(a){}}))}},44256:function(e,n,t){var a=this&&this.__createBinding||(Object.create?function(e,n,t,a){void 0===a&&(a=t),Object.defineProperty(e,a,{enumerable:!0,get:function(){return n[t]}})}:function(e,n,t,a){void 0===a&&(a=t),e[a]=n[t]}),i=this&&this.__setModuleDefault||(Object.create?function(e,n){Object.defineProperty(e,"default",{enumerable:!0,value:n})}:function(e,n){e.default=n}),r=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var n={};if(null!=e)for(var t in e)"default"!==t&&Object.prototype.hasOwnProperty.call(e,t)&&a(n,e,t);return i(n,e),n};Object.defineProperty(n,"__esModule",{value:!0}),n.OssOnly=n.FbInternalOnly=n.getEphemeralDiffNumber=n.hasEphemeralDiffNumber=n.isInternal=n.validateFbContentArgs=n.fbInternalOnly=n.fbContent=n.inpageeditor=n.feedback=n.uidocs=n.bloks=void 0,n.bloks=r(t(47596)),n.uidocs=r(t(17483)),n.feedback=r(t(24855)),n.inpageeditor=r(t(27312));const o=["internal","external"];function l(e){return s(e),c()?"internal"in e?d(e.internal):[]:"external"in e?d(e.external):[]}function d(e){return"function"==typeof e?e():e}function s(e){if("object"!=typeof e)throw new Error(`fbContent() args must be an object containing keys: ${o}. Instead got ${e}`);if(!Object.keys(e).find((e=>o.find((n=>n===e)))))throw new Error(`No valid args found in ${JSON.stringify(e)}. Accepted keys: ${o}`);const n=Object.keys(e).filter((e=>!o.find((n=>n===e))));if(n.length>0)throw new Error(`Unexpected keys ${n} found in fbContent() args. Accepted keys: ${o}`)}function c(){try{return Boolean(!1)}catch(e){return console.log("process.env.FB_INTERNAL couldn't be read, maybe you forgot to add the required webpack EnvironmentPlugin config?",e),!1}}function m(){try{return null}catch(e){return console.log("process.env.PHABRICATOR_DIFF_NUMBER couldn't be read, maybe you forgot to add the required webpack EnvironmentPlugin config?",e),null}}n.fbContent=l,n.fbInternalOnly=function(e){return l({internal:e})},n.validateFbContentArgs=s,n.isInternal=c,n.hasEphemeralDiffNumber=function(){return Boolean(m())},n.getEphemeralDiffNumber=m,n.FbInternalOnly=function(e){return c()?e.children:null},n.OssOnly=function(e){return c()?null:e.children}},27312:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.submitDiff=void 0;const i=t(11737);n.submitDiff=function(e){return a(this,void 0,void 0,(function*(){const{file_path:n,new_content:t,project_name:a,diff_number:r}=e;try{return yield i.call({module:"inpageeditor",api:"createPhabricatorDiffApi",args:{file_path:n,new_content:t,project_name:a,diff_number:r}})}catch(o){throw new Error(`Error occurred while trying to submit diff. Stack trace: ${o}`)}}))}},17483:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.getApi=n.docsets=void 0;const i=t(11737);n.docsets={BLOKS_CORE:"887372105406659"},n.getApi=function(e){return a(this,void 0,void 0,(function*(){const{name:n,framework:t,docset:a}=e;return yield i.call({module:"uidocs",api:"getApi",args:{name:n,framework:t,docset:a}})}))}}}]); \ No newline at end of file diff --git a/assets/js/ca95d5ad.fffa9184.js b/assets/js/ca95d5ad.fffa9184.js deleted file mode 100644 index f00493ce8..000000000 --- a/assets/js/ca95d5ad.fffa9184.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkwebsite=self.webpackChunkwebsite||[]).push([[3345],{3905:(e,n,t)=>{t.r(n),t.d(n,{MDXContext:()=>s,MDXProvider:()=>u,mdx:()=>h,useMDXComponents:()=>m,withMDXComponents:()=>c});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(){return r=Object.assign||function(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){return function(n){var t=m(n.components);return a.createElement(e,r({},n,{components:t}))}},m=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},u=function(e){var n=m(e.components);return a.createElement(s.Provider,{value:n},e.children)},f={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,s=d(e,["components","mdxType","originalType","parentName"]),c=m(t),u=i,p=c["".concat(o,".").concat(u)]||c[u]||f[u]||r;return t?a.createElement(p,l(l({ref:n},s),{},{components:t})):a.createElement(p,l({ref:n},s))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=p;var l={};for(var d in n)hasOwnProperty.call(n,d)&&(l[d]=n[d]);l.originalType=e,l.mdxType="string"==typeof e?e:i,o[1]=l;for(var s=2;s{t.d(n,{EO:()=>l,O1:()=>o,Rr:()=>d});var a=t(67294),i=t(44256);let r;function o(e){return a.createElement("a",{href:r+e.file},e.file)}function l(e){return a.createElement("a",{href:r+e.file},e.children)}r=(0,i.isInternal)()?"https://www.internalfb.com/code/fbsource/fbcode/":"https://github.com/facebookincubator/Glean/tree/master/";const d=e=>{let{children:n,internal:t,external:r}=e;return(0,i.fbContent)({internal:a.createElement("code",null,t),external:a.createElement("code",null,r)})}},1554:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>c,contentTitle:()=>d,default:()=>x,frontMatter:()=>l,metadata:()=>s,toc:()=>m});var a=t(83117),i=(t(67294),t(3905)),r=t(44256),o=t(12038);const l={id:"write",title:"Writing data to Glean",sidebar_label:"Writing data to Glean"},d=void 0,s={unversionedId:"write",id:"write",title:"Writing data to Glean",description:"This page describes the various ways in which data gets into Glean.",source:"@site/docs/write.md",sourceDirName:".",slug:"/write",permalink:"/docs/write",draft:!1,editUrl:"https://github.com/facebookincubator/Glean/tree/main/glean/website/docs/write.md",tags:[],version:"current",frontMatter:{id:"write",title:"Writing data to Glean",sidebar_label:"Writing data to Glean"},sidebar:"someSidebar",previous:{title:"Glean Databases",permalink:"/docs/databases"},next:{title:"Running the Tools",permalink:"/docs/running"}},c={},m=[{value:"Client-driven writing",id:"client-driven-writing",level:2},{value:"Server-driven writing",id:"server-driven-writing",level:2},{value:"APIs for writing",id:"apis-for-writing",level:2},{value:"Writing from the command line",id:"writing-from-the-command-line",level:2},{value:"JSON format",id:"json-format",level:3},{value:"Loading a DB from JSON in the shell",id:"loading-a-db-from-json-in-the-shell",level:3},{value:"Creating a database using the command line",id:"creating-a-database-using-the-command-line",level:3}],u=e=>function(n){return console.warn("Component "+e+" was not imported, exported, or provided by MDXProvider as global scope"),(0,i.mdx)("div",n)},f=u("Scribe"),p=u("Backup"),h={toc:m};function x(e){let{components:n,...t}=e;return(0,i.mdx)("wrapper",(0,a.Z)({},h,t,{components:n,mdxType:"MDXLayout"}),(0,i.mdx)("p",null,"This page describes the various ways in which data gets into Glean."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"For a complete walkthrough of the steps necessary to write an indexer, see ",(0,i.mdx)("a",{parentName:"p",href:"https://www.internalfb.com/intern/wiki/Glean/How_to:_write_a_Glean_indexer/"},"How to write a Glean indexer"),".")),(0,i.mdx)("p",null,"There are two main methods for creating a DB. Repo-wide indexing jobs\nwhich require multiple workers and have dependent tasks are managed by\nthe server, while simple one-off DB creation can be performed\nindependently by a single client."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"After the data is ingested by the write tier (",(0,i.mdx)("inlineCode",{parentName:"p"},"glean.write"),"), it is backed up and copied to the read tier (",(0,i.mdx)("inlineCode",{parentName:"p"},"glean"),") for efficient access. For newly created DB names, check out ",(0,i.mdx)("a",{parentName:"p",href:"https://www.internalfb.com/intern/wiki/Glean/Write/#configuring-db-backup-an"},"the section below")," for configuring this behavior.")),(0,i.mdx)("h2",{id:"client-driven-writing"},"Client-driven writing"),(0,i.mdx)("p",null,"A database can be created by a client using any of these methods:"),(0,i.mdx)("ol",null,(0,i.mdx)("li",{parentName:"ol"},"Programmatically, using one of the APIs listed in ",(0,i.mdx)("a",{parentName:"li",href:"#apis-for-writing"},"APIs for Writing"),"."),(0,i.mdx)("li",{parentName:"ol"},"On the command line: invoke the ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean")," command-line tool to send data in JSON format, see ",(0,i.mdx)("a",{parentName:"li",href:"#creating-a-database-using-the-command-line"}," Creating a database using the command line"),"."),(0,i.mdx)("li",{parentName:"ol"},"In the shell, use ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean shell --db-root=")," and then use the command ",(0,i.mdx)("inlineCode",{parentName:"li"},":load")," to create a DB from a JSON file. See ",(0,i.mdx)("a",{parentName:"li",href:"#loading-a-db-from-json-in-the-shell"},"Loading a DB from JSON in the shell"),".")),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("ol",{start:4},(0,i.mdx)("li",{parentName:"ol"},"Via Scribe, see ",(0,i.mdx)("a",{parentName:"li",href:"#writing-data-using-scribe"},"Writing data using Scribe")))),(0,i.mdx)("h2",{id:"server-driven-writing"},"Server-driven writing"),(0,i.mdx)("p",null,"Large indexing jobs are coordinated by the server, using a ",(0,i.mdx)("em",{parentName:"p"},"recipe")," to\ndefine the various tasks and the dependencies between them. Recipes\nare defined in the recipes configuration; see the ",(0,i.mdx)("inlineCode",{parentName:"p"},"--recipe-config"),"\noption in ",(0,i.mdx)("a",{parentName:"p",href:"/docs/running#common-options"},"Common options"),"."),(0,i.mdx)("p",null,"The job proceeds as follows:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"An indexing job is started by calling the server's ",(0,i.mdx)("inlineCode",{parentName:"p"},"kickOff")," Thrift\nmethod. This creates a work queue of tasks on the server.")),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"Clients obtain tasks from the server by calling ",(0,i.mdx)("inlineCode",{parentName:"p"},"getWork"),". Tasks may\nhave dependencies between them, so the server won't hand out a task\nuntil its dependencies are complete.")),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("p",{parentName:"li"},"When all tasks are done, the server marks the database as complete."))),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"For the fbsource indexer, the components of this are:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The coordinator, run by a ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/chronos/job/?jobname=glean.clang.indexer&smc=chronos_gp_admin_client"},"chronos job"),", which calls ",(0,i.mdx)("inlineCode",{parentName:"li"},"kickoff")," and then waits for completion."),(0,i.mdx)("li",{parentName:"ul"},"The workers which poll the server (see ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/wiki/Glean/ClangIndexer/"},"Glean/ClangIndexer"),")"),(0,i.mdx)("li",{parentName:"ul"},"The server (see ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/wiki/Glean/Infrastructure/"},"Glean/Infrastructure"),")"))),(0,i.mdx)("h2",{id:"apis-for-writing"},"APIs for writing"),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The C++ writing API is the most performant. It is used by the clang-based indexer for C++ and Objective C code. See ",(0,i.mdx)("a",{parentName:"li",href:"https://phabricator.intern.facebook.com/diffusion/FBS/browse/master/fbcode/glean/cpp/glean.h"},"glean/cpp/glean.h")),(0,i.mdx)("li",{parentName:"ul"},"In Hack, ",(0,i.mdx)("a",{parentName:"li",href:"https://www.internalfb.com/intern/codex/symbol/php/Glean/genKickOffForHandle/"},"genKickOffForHandle")," and the various functions for writing facts."))),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"The Haskell API for writing",(0,i.mdx)("ul",{parentName:"li"},(0,i.mdx)("li",{parentName:"ul"},"Example: ",(0,i.mdx)(o.O1,{file:"glean/client/hs/example/ExampleWriter.hs",mdxType:"SrcFile"}))))),(0,i.mdx)("p",null,"If none of the above work for you, the Thrift API enable basic write\naccess to the database."),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"kickOff")," can be used to create a new DB"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"sendJsonBatch")," is for sending facts in JSON-serialized form"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"finishBatch")," exposes the result of a previously sent JSON batch"),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"workFinished")," closes a DB")),(0,i.mdx)("p",null,"A rough outline of a client looks like:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean = make_glean_thrift_client()\ndb_handle = make_uuid()\nglean.kickOff(my_repo, KickOffFill(writeHandle=db_handle))\nfor json_batch in json_batches:\n handle = glean.sendJsonBatch(json_batch)\n result = glean.finishBatch(handle)\n # handle result\nglean.workFinished(my_repo, db_handle, success_or_failure)\n")),(0,i.mdx)("h2",{id:"writing-from-the-command-line"},"Writing from the command line"),(0,i.mdx)("h3",{id:"json-format"},"JSON format"),(0,i.mdx)("p",null,"The JSON format for Glean data is described in ",(0,i.mdx)("a",{parentName:"p",href:"/docs/schema/thrift"},"Thrift and JSON"),"."),(0,i.mdx)("p",null,"Here's an example of JSON data for writing to Glean:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},'[\n { "predicate": "cxx1.Name.1", # define facts for cxx1.Name.1\n "facts": [\n { "id": 1, "key": "abc" }, # define a fact with id 1\n { "id": 2, "key": "def" }\n ]\n },\n { "predicate": "cxx1.FunctionName.1", # define facts for cxx1.FunctionName.1\n "facts": [\n { "id": 3,\n "key": {\n "name": { "id": 1 }}} # reference to fact with id 1\n ]\n },\n { "predicate": "cxx1.FunctionQName.1", # define facts for cxx1.FunctionQName.1\n "facts": [\n { "key": {\n "name": 3, # 3 is shorthand for { "id": 3 }\n "scope": { "global_": {} } } },\n { "key": {\n "name": {\n "key": { # define a nested fact directly\n "name": {\n "key": "ghi" }}}, # another nested fact\n "scope": {\n "namespace_": {\n "key": {\n "name": {\n "key": "std" }}}}}\n ]\n }\n]\n')),(0,i.mdx)("p",null,"The rules of the game are:"),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},"Predicate names must include versions, i.e. ",(0,i.mdx)("inlineCode",{parentName:"li"},"cxx1.Name.1")," rather than ",(0,i.mdx)("inlineCode",{parentName:"li"},"cxx1.Name"),"."),(0,i.mdx)("li",{parentName:"ul"},"The ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," field when defining a fact is optional. The ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," numbers in the input file will ",(0,i.mdx)("em",{parentName:"li"},"not")," be the final ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," numbers assigned to the facts in the database."),(0,i.mdx)("li",{parentName:"ul"},"There are no restrictions on ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," values (any 64-bit integer will do) but an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," value may not be reused within a file."),(0,i.mdx)("li",{parentName:"ul"},"Later facts may refer to earlier ones using either ",(0,i.mdx)("inlineCode",{parentName:"li"},'{ "id": N }')," or just ",(0,i.mdx)("inlineCode",{parentName:"li"},"N"),"."),(0,i.mdx)("li",{parentName:"ul"},"It is only possible to refer to ",(0,i.mdx)("inlineCode",{parentName:"li"},"id"),"s from facts in the same file, if you are writing multiple files using ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean write")," or via the ",(0,i.mdx)("inlineCode",{parentName:"li"},"sendJsonBatch")," API."),(0,i.mdx)("li",{parentName:"ul"},"a nested facts can be defined inline, instead of defining it with an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," first and then referencing it."),(0,i.mdx)("li",{parentName:"ul"},"an inline nested fact can be given an ",(0,i.mdx)("inlineCode",{parentName:"li"},"id")," and referred to later.")),(0,i.mdx)("h3",{id:"loading-a-db-from-json-in-the-shell"},"Loading a DB from JSON in the shell"),(0,i.mdx)("p",null,"The shell is useful for experimenting with creating a DB from JSON data directly. Let's try loading the data above into a DB in the shell:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"$ mkdir /tmp/glean\n$ glean shell --db-root /tmp/glean\nGlean Shell, dev mode\ntype :help for help.\nno fbsource database availabe\n> :load test/0 /home/smarlow/test\nI0514 01:19:37.137109 3566745 Work.hs:184] test/16: database complete\n")),(0,i.mdx)("p",null,"Let's see what facts we loaded:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"test> :stat\n1\n count: 72\n size: 5988\ncxx1.FunctionName.1\n count: 2\n size: 66\ncxx1.FunctionQName.1\n count: 2\n size: 70\ncxx1.Name.1\n count: 4\n size: 148\ncxx1.NamespaceQName.1\n count: 1\n size: 35\ntest>\n")),(0,i.mdx)("p",null,"Note that there were 4 ",(0,i.mdx)("inlineCode",{parentName:"p"},"cxx1.Name.1")," facts - some of those were defined as inline nested facts in the JSON. We can query them all:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},'test> cxx1.Name _\n4 results, 1 queries, 4 facts, 0.22ms, 44296 bytes\n\n{ "id": 1096, "key": "abc" }\n{ "id": 1097, "key": "def" }\n{ "id": 1100, "key": "ghi" }\n{ "id": 1102, "key": "std" }\n')),(0,i.mdx)("p",null,"Note that the ",(0,i.mdx)("inlineCode",{parentName:"p"},"id")," values here do not correspond to the ",(0,i.mdx)("inlineCode",{parentName:"p"},"id")," values in the input file."),(0,i.mdx)("h3",{id:"creating-a-database-using-the-command-line"},"Creating a database using the command line"),(0,i.mdx)("p",null,"The ",(0,i.mdx)("inlineCode",{parentName:"p"},"glean")," command-line tool can be used to create a database directly on the server."),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("p",null,"There is a default retention policy for databases created this way; for details and to discuss your requirements, talk to the Glean team before creating databases.")),(0,i.mdx)("p",null,"To create a database from a single file of JSON facts:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean create --service --finish --db / \n")),(0,i.mdx)("p",null,"where"),(0,i.mdx)(r.FbInternalOnly,{mdxType:"FbInternalOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," can be ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean.write.test")," for testing. ",(0,i.mdx)("inlineCode",{parentName:"li"},"glean.write")," is the production write tier."))),(0,i.mdx)(r.OssOnly,{mdxType:"OssOnly"},(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," is the ",(0,i.mdx)("inlineCode",{parentName:"li"},"host:port")," of the Glean server"))),(0,i.mdx)("ul",null,(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," is the name for your DB. For indexing repositories we normally use the name of the repository, but it's just a string, so you can use whatever you want."),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," identifies this particular instance of your database. For repositories we normally use the revision hash, but, again, it's just a string."),(0,i.mdx)("li",{parentName:"ul"},(0,i.mdx)("inlineCode",{parentName:"li"},"")," the file containing the JSON facts.")),(0,i.mdx)("p",null,"If the file is more than, say, 100MB, this operation will probably time out sending the data to the server. To send large amounts of data you need to batch it up into multiple files, and then send it like this:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean create --service --db /\nglean write --service --db / \nglean write --service --db / \n...\nglean finish --service --db /\n")),(0,i.mdx)("p",null,"To find out if your DB made it:"),(0,i.mdx)("pre",null,(0,i.mdx)("code",{parentName:"pre"},"glean shell --service :list\n")),(0,i.mdx)("p",null,"This will list the DBs available on the write server."),(0,i.mdx)(f,{mdxType:"Scribe"}),(0,i.mdx)(p,{mdxType:"Backup"}))}x.isMDXComponent=!0},47596:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.getSpecInfo=void 0;const i=t(11737);n.getSpecInfo=function(e){return a(this,void 0,void 0,(function*(){return yield i.call({module:"bloks",api:"getSpecInfo",args:{styleId:e}})}))}},11737:function(e,n){var t=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.call=void 0;let a=!1,i=0;const r={};n.call=function(e){return t(this,void 0,void 0,(function*(){if("staticdocs.thefacebook.com"!==window.location.hostname&&"localhost"!==window.location.hostname)return Promise.reject(new Error("Not running on static docs"));a||(a=!0,window.addEventListener("message",(e=>{if("static-docs-bridge-response"!==e.data.event)return;const n=e.data.id;n in r||console.error(`Recieved response for id: ${n} with no matching receiver`),"response"in e.data?r[n].resolve(e.data.response):r[n].reject(new Error(e.data.error)),delete r[n]})));const n=i++,t=new Promise(((e,t)=>{r[n]={resolve:e,reject:t}})),o={event:"static-docs-bridge-call",id:n,module:e.module,api:e.api,args:e.args},l="localhost"===window.location.hostname?"*":"https://www.internalfb.com";return window.parent.postMessage(o,l),t}))}},24855:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.reportFeatureUsage=n.reportContentCopied=void 0;const i=t(11737);n.reportContentCopied=function(e){return a(this,void 0,void 0,(function*(){const{textContent:n}=e;try{yield i.call({module:"feedback",api:"reportContentCopied",args:{textContent:n}})}catch(t){}}))},n.reportFeatureUsage=function(e){return a(this,void 0,void 0,(function*(){const{featureName:n,id:t}=e;console.log("used feature");try{yield i.call({module:"feedback",api:"reportFeatureUsage",args:{featureName:n,id:t}})}catch(a){}}))}},44256:function(e,n,t){var a=this&&this.__createBinding||(Object.create?function(e,n,t,a){void 0===a&&(a=t),Object.defineProperty(e,a,{enumerable:!0,get:function(){return n[t]}})}:function(e,n,t,a){void 0===a&&(a=t),e[a]=n[t]}),i=this&&this.__setModuleDefault||(Object.create?function(e,n){Object.defineProperty(e,"default",{enumerable:!0,value:n})}:function(e,n){e.default=n}),r=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var n={};if(null!=e)for(var t in e)"default"!==t&&Object.prototype.hasOwnProperty.call(e,t)&&a(n,e,t);return i(n,e),n};Object.defineProperty(n,"__esModule",{value:!0}),n.OssOnly=n.FbInternalOnly=n.getEphemeralDiffNumber=n.hasEphemeralDiffNumber=n.isInternal=n.validateFbContentArgs=n.fbInternalOnly=n.fbContent=n.inpageeditor=n.feedback=n.uidocs=n.bloks=void 0,n.bloks=r(t(47596)),n.uidocs=r(t(17483)),n.feedback=r(t(24855)),n.inpageeditor=r(t(27312));const o=["internal","external"];function l(e){return s(e),c()?"internal"in e?d(e.internal):[]:"external"in e?d(e.external):[]}function d(e){return"function"==typeof e?e():e}function s(e){if("object"!=typeof e)throw new Error(`fbContent() args must be an object containing keys: ${o}. Instead got ${e}`);if(!Object.keys(e).find((e=>o.find((n=>n===e)))))throw new Error(`No valid args found in ${JSON.stringify(e)}. Accepted keys: ${o}`);const n=Object.keys(e).filter((e=>!o.find((n=>n===e))));if(n.length>0)throw new Error(`Unexpected keys ${n} found in fbContent() args. Accepted keys: ${o}`)}function c(){try{return Boolean(!1)}catch(e){return console.log("process.env.FB_INTERNAL couldn't be read, maybe you forgot to add the required webpack EnvironmentPlugin config?",e),!1}}function m(){try{return null}catch(e){return console.log("process.env.PHABRICATOR_DIFF_NUMBER couldn't be read, maybe you forgot to add the required webpack EnvironmentPlugin config?",e),null}}n.fbContent=l,n.fbInternalOnly=function(e){return l({internal:e})},n.validateFbContentArgs=s,n.isInternal=c,n.hasEphemeralDiffNumber=function(){return Boolean(m())},n.getEphemeralDiffNumber=m,n.FbInternalOnly=function(e){return c()?e.children:null},n.OssOnly=function(e){return c()?null:e.children}},27312:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.submitDiff=void 0;const i=t(11737);n.submitDiff=function(e){return a(this,void 0,void 0,(function*(){const{file_path:n,new_content:t,project_name:a,diff_number:r}=e;try{return yield i.call({module:"inpageeditor",api:"createPhabricatorDiffApi",args:{file_path:n,new_content:t,project_name:a,diff_number:r}})}catch(o){throw new Error(`Error occurred while trying to submit diff. Stack trace: ${o}`)}}))}},17483:function(e,n,t){var a=this&&this.__awaiter||function(e,n,t,a){return new(t||(t=Promise))((function(i,r){function o(e){try{d(a.next(e))}catch(n){r(n)}}function l(e){try{d(a.throw(e))}catch(n){r(n)}}function d(e){var n;e.done?i(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,l)}d((a=a.apply(e,n||[])).next())}))};Object.defineProperty(n,"__esModule",{value:!0}),n.getApi=n.docsets=void 0;const i=t(11737);n.docsets={BLOKS_CORE:"887372105406659"},n.getApi=function(e){return a(this,void 0,void 0,(function*(){const{name:n,framework:t,docset:a}=e;return yield i.call({module:"uidocs",api:"getApi",args:{name:n,framework:t,docset:a}})}))}}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.67e379df.js b/assets/js/runtime~main.9a2f0b03.js similarity index 99% rename from assets/js/runtime~main.67e379df.js rename to assets/js/runtime~main.9a2f0b03.js index f03ffe4a7..7c08ac724 100644 --- a/assets/js/runtime~main.67e379df.js +++ b/assets/js/runtime~main.9a2f0b03.js @@ -1 +1 @@ -(()=>{"use strict";var e,a,d,c,f,t={},b={};function r(e){var a=b[e];if(void 0!==a)return a.exports;var d=b[e]={id:e,loaded:!1,exports:{}};return t[e].call(d.exports,d,d.exports,r),d.loaded=!0,d.exports}r.m=t,r.c=b,e=[],r.O=(a,d,c,f)=>{if(!d){var t=1/0;for(i=0;i=f)&&Object.keys(r.O).every((e=>r.O[e](d[o])))?d.splice(o--,1):(b=!1,f0&&e[i-1][2]>f;i--)e[i]=e[i-1];e[i]=[d,c,f]},r.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return r.d(a,{a:a}),a},d=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,c){if(1&c&&(e=this(e)),8&c)return e;if("object"==typeof e&&e){if(4&c&&e.__esModule)return e;if(16&c&&"function"==typeof e.then)return e}var f=Object.create(null);r.r(f);var t={};a=a||[null,d({}),d([]),d(d)];for(var b=2&c&&e;"object"==typeof b&&!~a.indexOf(b);b=d(b))Object.getOwnPropertyNames(b).forEach((a=>t[a]=()=>e[a]));return t.default=()=>e,r.d(f,t),f},r.d=(e,a)=>{for(var d in a)r.o(a,d)&&!r.o(e,d)&&Object.defineProperty(e,d,{enumerable:!0,get:a[d]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((a,d)=>(r.f[d](e,a),a)),[])),r.u=e=>"assets/js/"+({53:"935f2afb",533:"b2b675dd",684:"af94d498",695:"0b5ee478",702:"e72a3ded",734:"f349a764",949:"eb83943b",1218:"2dc45ced",1477:"b2f554cd",1713:"a7023ddc",1953:"3e65d5b4",2535:"814f3328",2597:"373392d9",2642:"a81f2d1d",2731:"473435fa",3089:"a6aa9e1f",3106:"581d6198",3345:"ca95d5ad",3608:"9e4087bc",3627:"7c10977a",3824:"9d050fe4",3827:"37fc9d46",3988:"cfd71120",4013:"01a85c17",4128:"a09c2993",4195:"c4f5d8e4",4224:"5c4c46e6",4282:"31d6d5ed",4338:"41206b0e",4468:"1a20bc57",4648:"d558f29a",4904:"b0b0b448",5307:"af32bd62",5310:"476d6aec",5622:"f228e963",5628:"8293a772",5917:"21418ead",5925:"b8b1253e",5943:"a5dc57e5",6103:"ccc49370",6166:"2dfecbce",6412:"b16bf7d2",6439:"2651e53d",6587:"5e677a75",6910:"27315305",6982:"90f022e0",7023:"31fe93dc",7187:"246b6efd",7586:"d22a0e6a",7698:"abda9da4",7918:"17896441",8325:"33b5e0ca",8490:"6b032a97",8528:"5baf5c08",8610:"6875c492",9357:"391ef999",9403:"cc8d6d7f",9407:"ea32bb6f",9428:"283d7b21",9514:"1be78505",9519:"432e378d",9607:"c83e76ae",9675:"14a2b9f8",9704:"60691868"}[e]||e)+"."+{53:"9119e8a8",533:"37b72b08",684:"e1bf73db",695:"9489cc6b",702:"39d9265d",734:"9a7ac39c",949:"31f84837",1218:"b7bd7c4e",1477:"942e1482",1713:"9e1bdfb8",1953:"48c54bb8",2535:"714d8b90",2597:"29477b3f",2642:"253b6a55",2731:"e9977d9e",3089:"2f29c6cf",3106:"c5e44c63",3345:"fffa9184",3608:"62af1a24",3627:"cb3a5eb2",3824:"382fbf8f",3827:"2c5cec8d",3988:"29a57898",4013:"fc514125",4128:"c3a045a0",4195:"c2e74945",4224:"7eabc49e",4282:"52a7d976",4338:"ced36b31",4468:"07c5b6e2",4648:"624c5a42",4904:"e681fb9f",4972:"c98eee24",5307:"55a39fc6",5310:"efb5df1c",5622:"fa7e970f",5628:"df9f83db",5917:"dcdf8c9d",5925:"3745c099",5943:"cd2b7b08",6103:"ae691c92",6166:"1c4e87bb",6412:"0087e5e1",6439:"6af9bd8e",6587:"081a0151",6910:"edee02ea",6921:"f20e80a2",6982:"16ee69ab",7023:"d080ed08",7187:"013bd076",7586:"e17700ba",7698:"009f7f7a",7918:"cbaf46f4",8325:"a6308c17",8490:"7ec6431f",8528:"6656e233",8610:"6363f22a",9357:"8149209a",9403:"60d7af99",9407:"c7edbf91",9428:"894dddce",9514:"c14ec086",9519:"8e24ccc5",9607:"a7dff9eb",9675:"a113c165",9704:"985278fb"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),c={},f="website:",r.l=(e,a,d,t)=>{if(c[e])c[e].push(a);else{var b,o;if(void 0!==d)for(var n=document.getElementsByTagName("script"),i=0;i{b.onerror=b.onload=null,clearTimeout(s);var f=c[e];if(delete c[e],b.parentNode&&b.parentNode.removeChild(b),f&&f.forEach((e=>e(d))),a)return a(d)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:b}),12e4);b.onerror=u.bind(null,b.onerror),b.onload=u.bind(null,b.onload),o&&document.head.appendChild(b)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.nmd=e=>(e.paths=[],e.children||(e.children=[]),e),r.p="/",r.gca=function(e){return e={17896441:"7918",27315305:"6910",60691868:"9704","935f2afb":"53",b2b675dd:"533",af94d498:"684","0b5ee478":"695",e72a3ded:"702",f349a764:"734",eb83943b:"949","2dc45ced":"1218",b2f554cd:"1477",a7023ddc:"1713","3e65d5b4":"1953","814f3328":"2535","373392d9":"2597",a81f2d1d:"2642","473435fa":"2731",a6aa9e1f:"3089","581d6198":"3106",ca95d5ad:"3345","9e4087bc":"3608","7c10977a":"3627","9d050fe4":"3824","37fc9d46":"3827",cfd71120:"3988","01a85c17":"4013",a09c2993:"4128",c4f5d8e4:"4195","5c4c46e6":"4224","31d6d5ed":"4282","41206b0e":"4338","1a20bc57":"4468",d558f29a:"4648",b0b0b448:"4904",af32bd62:"5307","476d6aec":"5310",f228e963:"5622","8293a772":"5628","21418ead":"5917",b8b1253e:"5925",a5dc57e5:"5943",ccc49370:"6103","2dfecbce":"6166",b16bf7d2:"6412","2651e53d":"6439","5e677a75":"6587","90f022e0":"6982","31fe93dc":"7023","246b6efd":"7187",d22a0e6a:"7586",abda9da4:"7698","33b5e0ca":"8325","6b032a97":"8490","5baf5c08":"8528","6875c492":"8610","391ef999":"9357",cc8d6d7f:"9403",ea32bb6f:"9407","283d7b21":"9428","1be78505":"9514","432e378d":"9519",c83e76ae:"9607","14a2b9f8":"9675"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(a,d)=>{var c=r.o(e,a)?e[a]:void 0;if(0!==c)if(c)d.push(c[2]);else if(/^(1303|532)$/.test(a))e[a]=0;else{var f=new Promise(((d,f)=>c=e[a]=[d,f]));d.push(c[2]=f);var t=r.p+r.u(a),b=new Error;r.l(t,(d=>{if(r.o(e,a)&&(0!==(c=e[a])&&(e[a]=void 0),c)){var f=d&&("load"===d.type?"missing":d.type),t=d&&d.target&&d.target.src;b.message="Loading chunk "+a+" failed.\n("+f+": "+t+")",b.name="ChunkLoadError",b.type=f,b.request=t,c[1](b)}}),"chunk-"+a,a)}},r.O.j=a=>0===e[a];var a=(a,d)=>{var c,f,t=d[0],b=d[1],o=d[2],n=0;if(t.some((a=>0!==e[a]))){for(c in b)r.o(b,c)&&(r.m[c]=b[c]);if(o)var i=o(r)}for(a&&a(d);n{"use strict";var e,a,d,c,f,t={},b={};function r(e){var a=b[e];if(void 0!==a)return a.exports;var d=b[e]={id:e,loaded:!1,exports:{}};return t[e].call(d.exports,d,d.exports,r),d.loaded=!0,d.exports}r.m=t,r.c=b,e=[],r.O=(a,d,c,f)=>{if(!d){var t=1/0;for(i=0;i=f)&&Object.keys(r.O).every((e=>r.O[e](d[o])))?d.splice(o--,1):(b=!1,f0&&e[i-1][2]>f;i--)e[i]=e[i-1];e[i]=[d,c,f]},r.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return r.d(a,{a:a}),a},d=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,c){if(1&c&&(e=this(e)),8&c)return e;if("object"==typeof e&&e){if(4&c&&e.__esModule)return e;if(16&c&&"function"==typeof e.then)return e}var f=Object.create(null);r.r(f);var t={};a=a||[null,d({}),d([]),d(d)];for(var b=2&c&&e;"object"==typeof b&&!~a.indexOf(b);b=d(b))Object.getOwnPropertyNames(b).forEach((a=>t[a]=()=>e[a]));return t.default=()=>e,r.d(f,t),f},r.d=(e,a)=>{for(var d in a)r.o(a,d)&&!r.o(e,d)&&Object.defineProperty(e,d,{enumerable:!0,get:a[d]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((a,d)=>(r.f[d](e,a),a)),[])),r.u=e=>"assets/js/"+({53:"935f2afb",533:"b2b675dd",684:"af94d498",695:"0b5ee478",702:"e72a3ded",734:"f349a764",949:"eb83943b",1218:"2dc45ced",1477:"b2f554cd",1713:"a7023ddc",1953:"3e65d5b4",2535:"814f3328",2597:"373392d9",2642:"a81f2d1d",2731:"473435fa",3089:"a6aa9e1f",3106:"581d6198",3345:"ca95d5ad",3608:"9e4087bc",3627:"7c10977a",3824:"9d050fe4",3827:"37fc9d46",3988:"cfd71120",4013:"01a85c17",4128:"a09c2993",4195:"c4f5d8e4",4224:"5c4c46e6",4282:"31d6d5ed",4338:"41206b0e",4468:"1a20bc57",4648:"d558f29a",4904:"b0b0b448",5307:"af32bd62",5310:"476d6aec",5622:"f228e963",5628:"8293a772",5917:"21418ead",5925:"b8b1253e",5943:"a5dc57e5",6103:"ccc49370",6166:"2dfecbce",6412:"b16bf7d2",6439:"2651e53d",6587:"5e677a75",6910:"27315305",6982:"90f022e0",7023:"31fe93dc",7187:"246b6efd",7586:"d22a0e6a",7698:"abda9da4",7918:"17896441",8325:"33b5e0ca",8490:"6b032a97",8528:"5baf5c08",8610:"6875c492",9357:"391ef999",9403:"cc8d6d7f",9407:"ea32bb6f",9428:"283d7b21",9514:"1be78505",9519:"432e378d",9607:"c83e76ae",9675:"14a2b9f8",9704:"60691868"}[e]||e)+"."+{53:"9119e8a8",533:"37b72b08",684:"e1bf73db",695:"9489cc6b",702:"39d9265d",734:"9a7ac39c",949:"31f84837",1218:"b7bd7c4e",1477:"942e1482",1713:"9e1bdfb8",1953:"48c54bb8",2535:"714d8b90",2597:"29477b3f",2642:"253b6a55",2731:"e9977d9e",3089:"2f29c6cf",3106:"c5e44c63",3345:"11f5a106",3608:"62af1a24",3627:"cb3a5eb2",3824:"382fbf8f",3827:"2c5cec8d",3988:"29a57898",4013:"fc514125",4128:"c3a045a0",4195:"c2e74945",4224:"7eabc49e",4282:"52a7d976",4338:"ced36b31",4468:"07c5b6e2",4648:"624c5a42",4904:"e681fb9f",4972:"c98eee24",5307:"55a39fc6",5310:"efb5df1c",5622:"fa7e970f",5628:"df9f83db",5917:"dcdf8c9d",5925:"3745c099",5943:"cd2b7b08",6103:"ae691c92",6166:"1c4e87bb",6412:"0087e5e1",6439:"6af9bd8e",6587:"081a0151",6910:"edee02ea",6921:"f20e80a2",6982:"16ee69ab",7023:"d080ed08",7187:"013bd076",7586:"e17700ba",7698:"009f7f7a",7918:"cbaf46f4",8325:"a6308c17",8490:"7ec6431f",8528:"6656e233",8610:"6363f22a",9357:"8149209a",9403:"60d7af99",9407:"c7edbf91",9428:"894dddce",9514:"c14ec086",9519:"8e24ccc5",9607:"a7dff9eb",9675:"a113c165",9704:"985278fb"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),c={},f="website:",r.l=(e,a,d,t)=>{if(c[e])c[e].push(a);else{var b,o;if(void 0!==d)for(var n=document.getElementsByTagName("script"),i=0;i{b.onerror=b.onload=null,clearTimeout(s);var f=c[e];if(delete c[e],b.parentNode&&b.parentNode.removeChild(b),f&&f.forEach((e=>e(d))),a)return a(d)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:b}),12e4);b.onerror=u.bind(null,b.onerror),b.onload=u.bind(null,b.onload),o&&document.head.appendChild(b)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.nmd=e=>(e.paths=[],e.children||(e.children=[]),e),r.p="/",r.gca=function(e){return e={17896441:"7918",27315305:"6910",60691868:"9704","935f2afb":"53",b2b675dd:"533",af94d498:"684","0b5ee478":"695",e72a3ded:"702",f349a764:"734",eb83943b:"949","2dc45ced":"1218",b2f554cd:"1477",a7023ddc:"1713","3e65d5b4":"1953","814f3328":"2535","373392d9":"2597",a81f2d1d:"2642","473435fa":"2731",a6aa9e1f:"3089","581d6198":"3106",ca95d5ad:"3345","9e4087bc":"3608","7c10977a":"3627","9d050fe4":"3824","37fc9d46":"3827",cfd71120:"3988","01a85c17":"4013",a09c2993:"4128",c4f5d8e4:"4195","5c4c46e6":"4224","31d6d5ed":"4282","41206b0e":"4338","1a20bc57":"4468",d558f29a:"4648",b0b0b448:"4904",af32bd62:"5307","476d6aec":"5310",f228e963:"5622","8293a772":"5628","21418ead":"5917",b8b1253e:"5925",a5dc57e5:"5943",ccc49370:"6103","2dfecbce":"6166",b16bf7d2:"6412","2651e53d":"6439","5e677a75":"6587","90f022e0":"6982","31fe93dc":"7023","246b6efd":"7187",d22a0e6a:"7586",abda9da4:"7698","33b5e0ca":"8325","6b032a97":"8490","5baf5c08":"8528","6875c492":"8610","391ef999":"9357",cc8d6d7f:"9403",ea32bb6f:"9407","283d7b21":"9428","1be78505":"9514","432e378d":"9519",c83e76ae:"9607","14a2b9f8":"9675"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(a,d)=>{var c=r.o(e,a)?e[a]:void 0;if(0!==c)if(c)d.push(c[2]);else if(/^(1303|532)$/.test(a))e[a]=0;else{var f=new Promise(((d,f)=>c=e[a]=[d,f]));d.push(c[2]=f);var t=r.p+r.u(a),b=new Error;r.l(t,(d=>{if(r.o(e,a)&&(0!==(c=e[a])&&(e[a]=void 0),c)){var f=d&&("load"===d.type?"missing":d.type),t=d&&d.target&&d.target.src;b.message="Loading chunk "+a+" failed.\n("+f+": "+t+")",b.name="ChunkLoadError",b.type=f,b.request=t,c[1](b)}}),"chunk-"+a,a)}},r.O.j=a=>0===e[a];var a=(a,d)=>{var c,f,t=d[0],b=d[1],o=d[2],n=0;if(t.some((a=>0!==e[a]))){for(c in b)r.o(b,c)&&(r.m[c]=b[c]);if(o)var i=o(r)}for(a&&a(d);n Archive | Glean - +
- + \ No newline at end of file diff --git a/blog/incremental/index.html b/blog/incremental/index.html index 5a67fb26b..5b6677cca 100644 --- a/blog/incremental/index.html +++ b/blog/incremental/index.html @@ -5,7 +5,7 @@ Incremental indexing with Glean | Glean - + @@ -89,7 +89,7 @@ ownership of x is {A} and y is {B,C} (because it is referred to from z which has owner B), so the final owner of d is {A} && {B,C}.

Tracking all this shouldn't be too expensive, but it's tricky to get right!

- + \ No newline at end of file diff --git a/blog/index.html b/blog/index.html index 7328eec65..85b1d70f3 100644 --- a/blog/index.html +++ b/blog/index.html @@ -5,7 +5,7 @@ Blog | Glean - + @@ -89,7 +89,7 @@ ownership of x is {A} and y is {B,C} (because it is referred to from z which has owner B), so the final owner of d is {A} && {B,C}.

Tracking all this shouldn't be too expensive, but it's tricky to get right!

- + \ No newline at end of file diff --git a/blog/tags/glean/index.html b/blog/tags/glean/index.html index a42776748..eae4e6923 100644 --- a/blog/tags/glean/index.html +++ b/blog/tags/glean/index.html @@ -5,7 +5,7 @@ One post tagged with "glean" | Glean - + @@ -89,7 +89,7 @@ ownership of x is {A} and y is {B,C} (because it is referred to from z which has owner B), so the final owner of d is {A} && {B,C}.

Tracking all this shouldn't be too expensive, but it's tricky to get right!

- + \ No newline at end of file diff --git a/blog/tags/incremental/index.html b/blog/tags/incremental/index.html index 7d0864d45..4e22c425f 100644 --- a/blog/tags/incremental/index.html +++ b/blog/tags/incremental/index.html @@ -5,7 +5,7 @@ One post tagged with "incremental" | Glean - + @@ -89,7 +89,7 @@ ownership of x is {A} and y is {B,C} (because it is referred to from z which has owner B), so the final owner of d is {A} && {B,C}.

Tracking all this shouldn't be too expensive, but it's tricky to get right!

- + \ No newline at end of file diff --git a/blog/tags/index.html b/blog/tags/index.html index e429a6e8d..d221da287 100644 --- a/blog/tags/index.html +++ b/blog/tags/index.html @@ -5,14 +5,14 @@ Tags | Glean - +
- + \ No newline at end of file diff --git a/docs/angle/advanced/index.html b/docs/angle/advanced/index.html index f60851e9b..963162b59 100644 --- a/docs/angle/advanced/index.html +++ b/docs/angle/advanced/index.html @@ -5,14 +5,14 @@ Advanced Query Features | Glean - +

Advanced Query Features

Types and signatures​

Angle queries are strongly typed: the server will check your query for type-safety before executing it. Type-checking ensures that the query makes sense; that it's not trying to pattern-match strings against integers, or look for a field in a record that doesn't exist for example.

Angle's type-checker isn't very clever, though. It mostly doesn't do type inference, it checks that expressions have the intended type. When it doesn't know the intended type of an expression, it uses a dumb inference mode that can only infer the type when it's really obvious: like a fact match, or a string.

facts> P where C = { name = "Fish" }; example.Parent { C, P }
can't infer the type of: {name = "Fish"}
try adding a type annotation like ({name = "Fish"} : T)
or reverse the statement (Q = P instead of P = Q)

In cases like this, Angle's type-checker needs a bit of help. We can use a type signature to supply more information about the type:

facts> P where C = { name = "Fish" } : example.Class; example.Parent { C, P }
{ "id": 1024, "key": { "name": "Pet", "line": 10 } }

Here we used { name = "Fish" } : example.Class to tell Angle the expected type of the pattern. You should read the colon as "has type", and the type can be any valid Angle type, for details see Built-in types.

Explicit fact IDs​

Every fact has an ID, which is a 64-bit integer that uniquely identifies the fact in a particular database. You've probably noticed these fact IDs in the query results: every result has an id field with the fact ID, and a key field with the fact key.

Most Angle queries don't need to mention fact IDs explicitly, but sometimes it's useful. For example, you might need to perform a query to fetch some results, do some custom filtering on the results and then query Glean again using some of the fact IDs from the first query.

WARNING: a fact ID only makes sense in the context of a particular database, so make sure that your query that mentions fact IDs is being made on the same database that you obtained the fact ID from originally.

Glean has a syntax for referring to fact IDs directly; for example

facts> $1026 : example.Class
{ "id": 1026, "key": { "name": "Fish", "line": 30 } }

the syntax is $<fact ID>, but you will often want to use it with a type signature, as $<fact ID> : <predicate>.

If you get the predicate wrong, Glean will complain:

facts> $1026 : example.Parent
*** Exception: fact has the wrong type

The type can be omitted only if it is clear from the context, for example

facts> example.Parent { child = $1026 }
{ "id": 1029, "key": { "child": { "id": 1026 }, "parent": { "id": 1024 } } }

Sometimes you might want to use multiple fact IDs in a query. Or-patterns come in handy here:

facts> example.Parent { child = $1026 | $1027 }

Functional predicates​

All the predicates we've seen so far have been key-only predicates. A predicate can also have a value; we call these functional predicates or key-value predicates.

For example, we might model a reference to a class in our example schema like this:

predicate Reference :
{ file : string, line : nat, column : nat } -> Class

This says that for a given (file,line,column) there can be at most one reference to a Class. This uniqueness is the important property of a key-value predicate: for each key there is at most one value.

We query for key-value predicates using this syntax:

facts> C where example.Reference { file = "x", line = 1, column = 2 } -> C

The pattern after the -> matches the value. It can be an arbitrary pattern, just like the key. Note that facts cannot be efficiently searched by value, so the pattern that matches the value is a filter only.

- + \ No newline at end of file diff --git a/docs/angle/debugging/index.html b/docs/angle/debugging/index.html index 8f300a1ec..6f878ea41 100644 --- a/docs/angle/debugging/index.html +++ b/docs/angle/debugging/index.html @@ -5,7 +5,7 @@ Debugging | Glean - + @@ -15,7 +15,7 @@ shell, where you can experiment with queries quickly and easily.

If you're writing particularly complex queries, then consider using Derived Predicates to structure your query and to allow parts of the query to be re-used. To iterate on derived predicates, see How do I write and test a derived predicate?

Debugging a slow query​

Performance debugging can be tricky, because Angle is a very declarative language. There are often many ways to write the query that are correct, but not all of them will be fast.

The shell provides a few facilities to help with this.

> :profile full

Turning on query profiling allows you to see how many facts of each predicate are being searched by your query. For example:

fbsource> search.cxx.SearchByNameAndScope { name = "Future" }
...
Facts searched:
cxx1.RecordDeclaration.1 : 103
cxx1.TypeAliasDeclaration.2 : 11
cxx1.QName.1 : 8
cxx1.VariableDeclaration.2 : 7
cxx1.EnumDeclaration.1 : 7
cxx1.Name.1 : 1

If your query is expensive, then likely you will see some large numbers next to one or more predicates. This is a sign that you probably want to reorder the statements in your query, or lift out some nested queries into statements so that you can control the ordering more precisely.

> :debug on

Showing the internals​

The shell provides ways to show what Glean's query engine is doing internally. This is mostly useful for those working on the query engine itself, but it might also be helpful when debugging queries.

danger

We provide no guarantees about this functionality and it might change without warning.

> :debug ir

Shows the internal representation of the query after parsing, name resolution, type checking, and various transformations to simplify it. In particular, all the nesting has been flattened at this stage, so you can see the exact order of the searches on each predicate, which might help with performance debugging.

> :debug bytecode

Shows the compiled bytecode for the query. This is what Glean's virtual machine (VM) will execute to perform the query. Probably not all that useful for debugging queries.

- + \ No newline at end of file diff --git a/docs/angle/efficiency/index.html b/docs/angle/efficiency/index.html index 612825edd..c99f2d949 100644 --- a/docs/angle/efficiency/index.html +++ b/docs/angle/efficiency/index.html @@ -5,14 +5,14 @@ Query Efficiency | Glean - +

Query Efficiency

There are two important aspects of a query that affect its efficiency;

  1. Which fields are specified in a pattern
  2. The ordering of statements

We’ll cover each of these in the following sections.

Efficient matching of facts​

The order of fields in the schema matters a lot for efficiency. Glean indexes facts by a prefix of their keys, so if we know the prefix when searching for facts this will be a lot faster. Often this difference is absolutely crucial; the difference is between O(log n) and O(n), so when the database is large this can be many orders of magnitude.

For example, the example.Parent predicate we saw earlier is defined as

predicate Parent :
{
child : Person,
parent : Person,
}

We should think of this as a mapping from child to parent. Glean won’t stop you writing a query for { parent = ... }, but such a query will examine all of the example.Parent facts in the database. We can see how many facts are searched for our query using :profile full in the shell (see debugging for more details):

facts> :profile full
facts> example.Parent { parent = { name = "Pet" }}
(snip)
2 results, 2 facts, 0.40ms, 159440 bytes, 988 compiled bytes
Facts searched:
example.Parent.1 : 3

This tells us that although it found the 2 results we expected, it searched all 3 example.Parent facts in the process.

Making queries efficient using a derived predicate​

What if we wanted to efficiently map from parent to child? That’s easy to accomplish using a derived predicate. We’re going to define a new predicate with a different field ordering, and automatically generate the facts of our new predicate by deriving them from the facts of the existing predicate. For full details see Derived Predicates, what follows will be a walkthrough showing how to use a derived predicate to make our queries more efficient.

First we’ll define our derived predicate in the schema, like this:

predicate Child :
{
parent : Class,
child : Class,
}
stored
{ P, C } where Parent { C, P }

We can try this out in the shell. First we have to create a new database to hold the derived facts that is stacked on top of the old database. Drop out of the shell and run this command to create the new database:

glean create --db-root /tmp/glean/db --schema dir:/tmp/glean/schema --db derived/1 --stacked facts/1

Now start the shell again and load the stacked database. Note that we can still query facts from the original database:

> :db derived/1
derived> example.Parent _
{ "id": 1028, "key": { "child": { "id": 1025 }, "parent": { "id": 1024 } } }
{ "id": 1029, "key": { "child": { "id": 1026 }, "parent": { "id": 1024 } } }
{ "id": 1030, "key": { "child": { "id": 1027 }, "parent": { "id": 1026 } } }

Initially we have no facts of the Child predicate:

derived> example.Child _
0 results, 0 facts, 0.91ms, 812952 bytes, 664 compiled bytes

But we can create them automatically:

(TODO: check this still works, do we need a :derive command now?)

derived> * example.Child _
{ "id": 1037, "key": { "parent": { "id": 1024 }, "child": { "id": 1025 } } }
{ "id": 1038, "key": { "parent": { "id": 1024 }, "child": { "id": 1026 } } }
{ "id": 1039, "key": { "parent": { "id": 1026 }, "child": { "id": 1027 } } }

(the * means β€œderive and store” the facts produced by the query. To derive facts for a production database you would use either glean derive from the command line, or the appropriate Thrift API in whatever language you’re using to talk to the Glean server).

Now we have 3 facts of our derived predicate:

derived> :stat
example.Child.1
count: 3
size: 87 (87 bytes) 100.0000%

And finally we can make efficient queries to find a parent’s children:

derived> example.Child { parent = { name = "Pet" }}
{ "id": 1037, "key": { "parent": { "id": 1024 }, "child": { "id": 1025 } } }
{ "id": 1038, "key": { "parent": { "id": 1024 }, "child": { "id": 1026 } } }

2 results, 2 facts, 0.41ms, 160992 bytes, 1013 compiled bytes
Facts searched:
example.Child.1 : 2
example.Class.1 : 1

We found the correct 2 results, and only searched 2 example.Child facts.

This idea of adding extra indices to your database using derived predicates is common practice when working with Glean data, so it’s worthwhile getting familiar with it.

The order of statements is important​

Suppose we want to find the grandparent of the Goldfish class using our example schema. We would probably write it like this:

Q where
example.Parent { child = { name = "Goldfish" }, parent = P };
example.Parent { child = P, parent = Q }

Generally speaking the statements are matched top-to-bottom. For each of the facts that match the first statement, bind the variables in the pattern and then proceed with the second statement, and so on.

As written, this query works by first finding the parent of Goldfish and then finding its parent, which is exactly what we want. This query will be efficient, because both stages are matching on the first field of the example.Parent predicate.

If instead we swapped the order of the statements:

Q where
example.Parent { child = P, parent = Q };
example.Parent { child = { name = "Goldfish" }, parent = P }

The query still works, and means exactly the same thing, but it’s much less efficient. This query works as follows:

  • for each example.Parent fact, call the child P and the parent Q
  • search for an example.Parent fact with child { name = "Goldfish" } and parent P
  • if it exists, then Q is a result

This is going to involve searching all of the example.Parent facts, instead of just the ones for the parent of Goldfish.

The general rule of thumb is to do the more specific searches first. The search for example.Parent { child = { name = "Goldfish" }, parent = P } is efficient because we know the child, this binds he value of P which makes the search for example.Parent { child = P, parent = Q } also fast.


- + \ No newline at end of file diff --git a/docs/angle/guide/index.html b/docs/angle/guide/index.html index 812a9546f..74d15971b 100644 --- a/docs/angle/guide/index.html +++ b/docs/angle/guide/index.html @@ -5,7 +5,7 @@ Angle Guide | Glean - + @@ -35,7 +35,7 @@ than it. If Y = Values[..] were outside of the negation, the meaning would be give me all X for which there is at least one Y that is not greater than it. The answer to that would be all elements.

- + \ No newline at end of file diff --git a/docs/angle/intro/index.html b/docs/angle/intro/index.html index cff999034..5ce55300e 100644 --- a/docs/angle/intro/index.html +++ b/docs/angle/intro/index.html @@ -5,7 +5,7 @@ Angle Introduction | Glean - + @@ -16,7 +16,7 @@ particularly suited for finding and extracting data from Glean.

To give you a flavour of the query language, here is how we could return the names of all the member declarations defined in a JavaScript file project/myfile.js:

N where
flow.FileDeclaration {
file = "project/myfile.js",
declaration = {
memberDecl = {
name = N
}
}
}

To learn about Angle, start with the Guide.

- + \ No newline at end of file diff --git a/docs/angle/reference/index.html b/docs/angle/reference/index.html index bd68f1619..a11335b9c 100644 --- a/docs/angle/reference/index.html +++ b/docs/angle/reference/index.html @@ -5,7 +5,7 @@ Angle Reference | Glean - + @@ -24,7 +24,7 @@ Β Β term < term
Β Β term <= term
Β Β term !== term

Standard numerical comparisons. These work on values of type nat only, and they have value {} if the comparison succeeds, otherwise they fail (in the same way as a predicate match fails if there are no facts that match the pattern).

Β Β term != term

Standard comparison between two terms of any type. It has a value of {} if the comparison succeeds, otherwise it fails in the same way as a predicate match fails if there are no facts that match the pattern.

- + \ No newline at end of file diff --git a/docs/angle/style/index.html b/docs/angle/style/index.html index b97727ec8..4b5eb57d5 100644 --- a/docs/angle/style/index.html +++ b/docs/angle/style/index.html @@ -5,14 +5,14 @@ Angle Style Guide | Glean - +

Angle Style Guide

Typical Angle style uses the following rules:

  • 2-column indentation
  • trailing commas
  • open/close braces on a line by themselves
  • camel case for record field names

e.g.

# Named parameter
type Parameter =
{
name : Name,
type : Type,
isVariadic : bool,
}

This uses quite a lot of vertical space, but it's clear and works well with source control.

It's OK to put things on a single line if they fit:

type Access = enum { Public | Protected | Private }
- + \ No newline at end of file diff --git a/docs/building/index.html b/docs/building/index.html index ea44435b6..3acfe112e 100644 --- a/docs/building/index.html +++ b/docs/building/index.html @@ -5,7 +5,7 @@ Building Glean from Source | Glean - + @@ -26,7 +26,7 @@ build and install its dependencies:

./install_deps.sh

Build Glean​

Now you can build all the Glean parts:

make

If everything worked, the tests should pass:

make test

At this point you can cabal install to install the executables into ~/.cabal/bin.

Tips for faster builds​

If you have 4 or more cores and at least 16G of ram, you can significantly speed up the build times by passing some flags to the build stages. On an 6 core machine with 16G of ram you might use, to save 50% or more of the build time.

./install_deps.sh --threads 6
make EXTRA_GHC_OPTS='-j4 +RTS -A128m -n2m -RTS'

Using clang++-12 and clang-12 as the C and C++ compilers can shave another 25% off the build time.

- + \ No newline at end of file diff --git a/docs/cli/index.html b/docs/cli/index.html index c4fb8556d..3cd8796cf 100644 --- a/docs/cli/index.html +++ b/docs/cli/index.html @@ -5,7 +5,7 @@ The Glean CLI tool | Glean - + @@ -71,7 +71,7 @@ once a database is marked complete it could be replicated, so we shouldn't be modifying it.

  • --db NAME/INSTANCE or --db-name NAME --db-instance INSTANCE
    Specifies the name and instance of the database
- + \ No newline at end of file diff --git a/docs/databases/index.html b/docs/databases/index.html index 824cd4940..5b6991f6c 100644 --- a/docs/databases/index.html +++ b/docs/databases/index.html @@ -5,7 +5,7 @@ Glean Databases | Glean - + @@ -20,7 +20,7 @@ index the current state of a source repository. The process works like this:

  • The job invokes glean create --service <write-server> <args> to create the database.

  • At this point the database is in the Incomplete state. Queries are supported in this state, and always reflect the current contents.

  • Facts are written to the database using the methods described in Writing data to Glean, and finally the database is closed by invoking glean finish --service <write-server> <args> or the appropriate Thrift method.

  • The database is now in the Complete state.

  • If backups are allowed for this database, then:

    • the write server uploads the database to backup storage.
    • servers that are configured to restore databases automatically can download the DB from backup storage, and use it to serve queries from clients.
note

There are currently no backup backends implemented for open-source Glean.

- + \ No newline at end of file diff --git a/docs/derived/index.html b/docs/derived/index.html index b360d1e8f..e655c4598 100644 --- a/docs/derived/index.html +++ b/docs/derived/index.html @@ -5,7 +5,7 @@ Derived Predicates | Glean - + @@ -39,7 +39,7 @@ describes the data in the rest of the stack.

If you need to test changes to an existing predicate, copy the predicate and give it a new name to test it, and then fold the changes back into the original when you've finished testing.

Now, you can derive your new predicate:

glean derive --db-root ~/local/gleandb --db stacked/0 my.new.Predicate

and inspect the results in the shell:

glean shell --db-root ~/local/gleandb --db stacked/0
- + \ No newline at end of file diff --git a/docs/implementation/incrementality/index.html b/docs/implementation/incrementality/index.html index 854b8b60a..6c2c9b4f2 100644 --- a/docs/implementation/incrementality/index.html +++ b/docs/implementation/incrementality/index.html @@ -5,7 +5,7 @@ Incrementality | Glean - + @@ -97,7 +97,7 @@ ownership of the derived facts. Incremental derivation must therefore consider facts that have new ownership in the stacked DB when deriving. At the time of writing, this isn't implemented yet.

- + \ No newline at end of file diff --git a/docs/indexer/cxx/index.html b/docs/indexer/cxx/index.html index d2485303e..b499c1091 100644 --- a/docs/indexer/cxx/index.html +++ b/docs/indexer/cxx/index.html @@ -5,7 +5,7 @@ C++ and C | Glean - + @@ -28,7 +28,7 @@ PATH variable for this to succeed, or in the build tree.

Schema​

The schema is in glean/schema/source/cxx.angle

The schema is quite rich and captures C++, C, Objective-C and C pre-processor symbols, the semantic structure of C++ symbols, and is precise enough to do automated analysis of C++ code.

- + \ No newline at end of file diff --git a/docs/indexer/flow/index.html b/docs/indexer/flow/index.html index 54eaabd5b..7ead6f8b8 100644 --- a/docs/indexer/flow/index.html +++ b/docs/indexer/flow/index.html @@ -5,7 +5,7 @@ JavaScript (Flow) | Glean - + @@ -16,7 +16,7 @@ in the Glean demo Docker image to try out.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Flow repository with:

glean index flow DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Flow project (with .flowconfig)
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

Run the indexer (manually)​

flow glean DIR --output-dir JSON --write-root PREFIX

where

  • DIR is the root directory containing the JavaScript/Flow files
  • JSON is the directory in which to write the output .json files
  • PREFIX is a prefix to add to the files in the Glean index (this can be empty if you don't need a prefix)

The generated files can be ingested into a Glean database using glean create.

Derived predicates​

Several predicates should be derived after indexing. For each stored predicate in the schema you should glean derive the predicate.

In the shell​

Flow source can also be indexed directly from the Glean shell:

:index flow DIR

Schema​

The schema is in glean/schema/source/flow.angle

- + \ No newline at end of file diff --git a/docs/indexer/hack/index.html b/docs/indexer/hack/index.html index 84fc552ff..de872c6d3 100644 --- a/docs/indexer/hack/index.html +++ b/docs/indexer/hack/index.html @@ -5,7 +5,7 @@ Hack | Glean - + @@ -13,7 +13,7 @@

Hack

The Hack indexer is built into the Hack typechecker. Stable and nightly binaries of the Hack indexer are available.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Hack repository with:

glean index hack DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Hack project (with .hhconfig)
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

In the shell​

Hack source can also be indexed directly from the Glean shell:

:index hack DIR

Run the indexer (manually)​

hh_server DIR --write-symbol-info JSON \
--config symbol_write_include_hhi=false \
--config symbolindex_search_provider=NoIndex \
--config lazy_decl=true \
--config lazy_parse=true \
--config lazy_init2=true \

where

  • DIR is the root directory containing the .php files
  • JSON is the directory in which to write the output .json files
  • We need several config flags to instantiate hh_server for indexing

The generated files can be ingested into a Glean database using glean create.

Derived predicates​

Several predicates should be derived after indexing. For each stored predicate in the schema you should glean derive the predicate.

Schema​

The schema is in glean/schema/source/hack.angle

- + \ No newline at end of file diff --git a/docs/indexer/haskell/index.html b/docs/indexer/haskell/index.html index 087cab663..a2ea9c204 100644 --- a/docs/indexer/haskell/index.html +++ b/docs/indexer/haskell/index.html @@ -5,14 +5,14 @@ Haskell | Glean - +

Haskell

To index Haskell Glean consumes .hie files produced by the GHC compiler (>=8.8) with the flag -fwrite-ide-info.

Run the indexer

The indexer is run via the main glean CLI tool.

BUILD --ghc-options=-fwrite-ide-info
glean --db-root DBDIR index haskell ROOT --db NAME/INSTANCE

where

  • BUILD is a build command such that GHC is called with -fwrite-ide-info
  • DBDIR is the directory where the Glean db will be created
  • ROOT is the root directory containing the build artifacts generated with the fwrite-ide-info flag (e.g. dist if a Cabal project)
  • name/hash is the name of the repository to create

Schema​

The schema is in

- + \ No newline at end of file diff --git a/docs/indexer/intro/index.html b/docs/indexer/intro/index.html index 9c78a0ebb..bddfa2ced 100644 --- a/docs/indexer/intro/index.html +++ b/docs/indexer/intro/index.html @@ -5,7 +5,7 @@ Introduction | Glean - + @@ -15,7 +15,7 @@ Glean, and how to use them. Indexers are programs that analyze source code to produce facts for Glean to store. They may be standalone programs, or part of existing IDE or language tools.

- + \ No newline at end of file diff --git a/docs/indexer/lsif-go/index.html b/docs/indexer/lsif-go/index.html index f5e65956f..5a6e82650 100644 --- a/docs/indexer/lsif-go/index.html +++ b/docs/indexer/lsif-go/index.html @@ -5,7 +5,7 @@ Go | Glean - + @@ -13,7 +13,7 @@

Go

To index Go we use SourceGraph's LSIF indexer for Go. LSIF is a new format for tools to share information about code. Binary releases of lsif-go are available ffor x86 Linux which will work as Glean indexers. The LSIF indexer uses a recent (>1.15) version of Go.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Go repository with:

glean index go DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Go project
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

In the shell​

Go source can also be indexed directly from the Glean shell:

:index go DIR

The shell will pick a DB name and hash for you based on DIR.

Schema​

The schema is in glean/schema/source/lsif.angle

- + \ No newline at end of file diff --git a/docs/indexer/lsif-java/index.html b/docs/indexer/lsif-java/index.html index c0d4da755..b6571dc44 100644 --- a/docs/indexer/lsif-java/index.html +++ b/docs/indexer/lsif-java/index.html @@ -5,7 +5,7 @@ Java | Glean - + @@ -22,7 +22,7 @@ to glean

In the shell​

Java source can also be indexed directly from the Glean shell:

:index java-lsif DIR

The shell will pick a DB name and hash for you based on DIR. You can also run lsif-java offline, and then :load the resulting lsif file into the shell.

Schema​

The schema is in glean/schema/source/lsif.angle

- + \ No newline at end of file diff --git a/docs/indexer/lsif-rust/index.html b/docs/indexer/lsif-rust/index.html index c985636b1..b5810d648 100644 --- a/docs/indexer/lsif-rust/index.html +++ b/docs/indexer/lsif-rust/index.html @@ -5,7 +5,7 @@ Rust | Glean - + @@ -13,7 +13,7 @@

Rust

To index Rust we use rust-analyzer in LSIF mode. Pre-built binaries of rust-analyzer can be used as indexers that emit LSIF from Rust source.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Rust repository with:

glean index rust-lsif DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Rust project
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

In the shell​

Rust source can also be indexed directly from the Glean shell:

:index rust-lsif DIR

The shell will pick a DB name and hash for you based on DIR.

Schema​

The schema is in glean/schema/source/lsif.angle

- + \ No newline at end of file diff --git a/docs/indexer/lsif-typescript/index.html b/docs/indexer/lsif-typescript/index.html index f3821423e..221e9576e 100644 --- a/docs/indexer/lsif-typescript/index.html +++ b/docs/indexer/lsif-typescript/index.html @@ -5,7 +5,7 @@ TypeScript | Glean - + @@ -13,7 +13,7 @@

TypeScript

To index TypeScript we use SourceGraph's LSIF indexer for TypeScript. LSIF is a new format for tools to share information about code. Releases of lsif-tsc can be installed with yarn or npm and used as indexers for LSIF, which Glean will accept. The indexer itself requires a node.js runtime.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your TypeScript repository with:

glean index typescript DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the TypeScript project
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

To index very large TypeScript repositories, it may be necessary to use more heap memory in node.js (or break up the targets into subdirectories). Setting export NODE_OPTIONS="--max-old-space-size=8192" in the environment in which the indexer runs may help.

In the shell​

TypeScript source can also be indexed directly from the Glean shell:

:index typescript DIR

The shell will pick a DB name and hash for you based on DIR.

Schema​

The schema is in glean/schema/source/lsif.angle

- + \ No newline at end of file diff --git a/docs/indexer/scip-dotnet/index.html b/docs/indexer/scip-dotnet/index.html index 319713f14..8feab840a 100644 --- a/docs/indexer/scip-dotnet/index.html +++ b/docs/indexer/scip-dotnet/index.html @@ -5,7 +5,7 @@ Dotnet | Glean - + @@ -13,7 +13,7 @@

Dotnet

To index Dotnet we use SourceGraph's SCIP indexer for dotnet. SCIP is a new format for tools to share information about code. Releases of scip-dotnet can be installed with dotnet tools and used as indexers for SCIP, which Glean will accept. The indexer itself requires a dotnet runtime environment.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Dotnet repository with:

glean index dotnet-scip DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Dotnet project
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

In the shell​

Dotnet source can also be indexed directly from the Glean shell:

:index dotnet-scip DIR

The shell will pick a DB name and hash for you based on DIR.

Schema​

The schema is in glean/schema/source/scip.angle

- + \ No newline at end of file diff --git a/docs/indexer/scip-python/index.html b/docs/indexer/scip-python/index.html index 64602fefb..40bf443f7 100644 --- a/docs/indexer/scip-python/index.html +++ b/docs/indexer/scip-python/index.html @@ -5,7 +5,7 @@ Python | Glean - + @@ -13,7 +13,7 @@

Python

To index Python we use SourceGraph's SCIP indexer for python. SCIP is a new format for tools to share information about code. Releases of scip-python can be installed with yarn or npm and used as indexers for SCIP, which Glean will accept. The indexer itself requires a python runtime.

Run the indexer​

The indexer is run via the main glean CLI tool.

> cabal build exe:glean

And index your Python repository with:

glean index python-scip DIR --db NAME/INSTANCE

where

  • DIR is the root directory containing the Python project
  • name/hash is the name of the repository to create

Provide the usual --db-root and --schema or --service arguments to glean

In the shell​

Python source can also be indexed directly from the Glean shell:

:index python-scip DIR

The shell will pick a DB name and hash for you based on DIR.

Schema​

The schema is in glean/schema/source/scip.angle

- + \ No newline at end of file diff --git a/docs/introduction/index.html b/docs/introduction/index.html index 2da8086d8..1cce32f55 100644 --- a/docs/introduction/index.html +++ b/docs/introduction/index.html @@ -5,7 +5,7 @@ Introduction | Glean - + @@ -51,7 +51,7 @@ want to support. Usually that means things like the locations of definitions and cross-references, but not expressions.↩
  • If you're familiar with Datalog, it's worth noting that currently Angle is limited to non-recursive queries only.↩
  • - + \ No newline at end of file diff --git a/docs/query/api/haskell/index.html b/docs/query/api/haskell/index.html index 8f97a367d..0e5d056e9 100644 --- a/docs/query/api/haskell/index.html +++ b/docs/query/api/haskell/index.html @@ -5,7 +5,7 @@ Haskell Query API | Glean - + @@ -23,7 +23,7 @@ request to Glean. This makes it efficient to do shallow queries and then selectively traverse and expand the results as needed.

    To use the API, import Glean.Haxl. The implementation of the API is in glean/haxl/Haxl/DataSource/Glean.hs.

    - + \ No newline at end of file diff --git a/docs/query/haskell/index.html b/docs/query/haskell/index.html index 84352e2fd..3e518cce8 100644 --- a/docs/query/haskell/index.html +++ b/docs/query/haskell/index.html @@ -5,14 +5,14 @@ Haskell Query API | Glean - +
    - + \ No newline at end of file diff --git a/docs/query/intro/index.html b/docs/query/intro/index.html index 968c4c54e..8275e76a0 100644 --- a/docs/query/intro/index.html +++ b/docs/query/intro/index.html @@ -5,7 +5,7 @@ Querying Glean | Glean - + @@ -27,7 +27,7 @@ that you can install in VS Code by following the instructions in the next section.

    Installing​

    code --install-extension path/to/glean-x.y.z.vsix

    The VS Code documentation describes alternative ways to install an extension from a .vsix file, from within the editor, in case the above command does not work or a more graphical, user-friendly is preferable.

    - + \ No newline at end of file diff --git a/docs/running/index.html b/docs/running/index.html index b601a73af..e274b0eb3 100644 --- a/docs/running/index.html +++ b/docs/running/index.html @@ -5,7 +5,7 @@ Running the Tools | Glean - + @@ -52,7 +52,7 @@ created, so it is likely to be a correct description of the data in the database.

  • --db-mock-writes
    Allow write operations, but discard the data and don't write it to the DB.

  • - + \ No newline at end of file diff --git a/docs/schema/all/index.html b/docs/schema/all/index.html index dda5c92d5..14d4d5fc1 100644 --- a/docs/schema/all/index.html +++ b/docs/schema/all/index.html @@ -5,7 +5,7 @@ The special "all" schema | Glean - + @@ -26,7 +26,7 @@ all separately, and clients can select at build time which version they want to use. This enables incremental migration of code from one schema to another schema.

    - + \ No newline at end of file diff --git a/docs/schema/basic/index.html b/docs/schema/basic/index.html index fb70d289c..3f688a0f0 100644 --- a/docs/schema/basic/index.html +++ b/docs/schema/basic/index.html @@ -5,7 +5,7 @@ Basic Concepts | Glean - + @@ -23,7 +23,7 @@ patterns that match multiple keys, and get back all the facts that match the pattern. More about this when we talk about Angle queries.

    - + \ No newline at end of file diff --git a/docs/schema/changing/index.html b/docs/schema/changing/index.html index f00dc382d..78b15b1cd 100644 --- a/docs/schema/changing/index.html +++ b/docs/schema/changing/index.html @@ -5,7 +5,7 @@ How do I change a schema? | Glean - + @@ -52,7 +52,7 @@ which can be useful if you want to perform schema changes in a more explicit way, or to rename schemas.

    The feature is enabled using a top-level directive

    schema my_schema.2 evolves my_schema.1

    This declaration has the effect of treating queries for my_schema.1 predicates as if they were for my_schema.2. That is the query results will be retrieved from the database in the shape of a my_schema.2 fact and transformed into a fact of the equivalent my_schema.1 predicate specified in the query.

    The new schema must contain all the predicates of the old schema, either with new versions or old versions, and their definitions must be backwards compatible. We can achieve this by copying the entire content of the old schema into the new one and modifying it there.

    Now what should Glean do when a client asks for a fact from an old schema?

    • Answer with db facts from the old schema
    • Answer with db facts from the new schema transformed into the old ones.

    If there are no facts of the old schema in in the database we will take option 2. If the database has any fact at all of the old schema we choose option 1.

    That is, schema evolutions only take effect if there are no facts of the old schema in the database; it is ignored otherwise.

    As an example suppose we start with the following schemas:

    schema src.1 {
    predicate File {
    path : string
    }
    }

    schema os.1 {
    import src.1

    predicate Permissions {
    file : File,
    permissions : nat
    }
    }

    schema info.1 {
    import src.1

    predicate IsTemporary {
    file : File
    } F where F = src.File { path = "/tmp".. }
    }

    Now we want to make a backward-compatible change to src.File and add an extension field. We could add this to the file:

    schema src.2 {
    predicate File {
    path : string,
    extension : string
    }
    }

    schema src.2 evolves src.1

    Now if the indexer is still producing only src.1 facts, all other predicates will work as before and queries for src.File.2 will return no results.

    Once the indexer is changed to produce only src.2 facts queries like src.File.1 _ will be fulfilled using data from the src.2 schema, converting the src.File.2 results to the shape of src.File.1 before returning to the client.

    This is also the case in the derivation query of info.IsTemporary. Although info imports src.1, the query will be transformed to use src.2 facts.

    On the other hand, os.Permissions will be empty. This must be the case because its first field references a src.File.1 fact, of which there is none in the database. For this predicate to continue being available we must evolve its schema as well.

    schema os.2 {             # changed
    import src.2 # changed

    predicate Permissions {
    file : File,
    permissions : nat
    }
    }

    schema os.2 evolves os.1 # changed
    - + \ No newline at end of file diff --git a/docs/schema/design/index.html b/docs/schema/design/index.html index 5cbb32f70..37384c2d6 100644 --- a/docs/schema/design/index.html +++ b/docs/schema/design/index.html @@ -5,7 +5,7 @@ Schema Design | Glean - + @@ -52,7 +52,7 @@ example of this was described in What is the difference between a predicate and a type?.

    How to experiment with schema design​

    • Generate some data and see how large it is, using :stat in the shell.

    • Write some example queries against your data, and check how much searching they do using :profile in the shell (see Query Debugging).

    - + \ No newline at end of file diff --git a/docs/schema/recursion/index.html b/docs/schema/recursion/index.html index 99bd17b83..5cba9d129 100644 --- a/docs/schema/recursion/index.html +++ b/docs/schema/recursion/index.html @@ -5,7 +5,7 @@ Recursion | Glean - + @@ -23,7 +23,7 @@ keys would make this process significantly harder.

    Facts can be recursive in their values, but not their keys. A mutually recursive set of facts must be added to the database in a single batch, however.

    To summarise, recursion is

    • allowed between predicates
    • not allowed between keys
    • allowed between values
    - + \ No newline at end of file diff --git a/docs/schema/syntax/index.html b/docs/schema/syntax/index.html index 8c923430c..4a2089c7c 100644 --- a/docs/schema/syntax/index.html +++ b/docs/schema/syntax/index.html @@ -5,7 +5,7 @@ Syntax | Glean - + @@ -44,7 +44,7 @@ future. The process for safely changing schemas is described in Changing the Schema.

    schema example.2 : example.1 {
    predicate Class :
    {
    # new definition of Class
    }
    }

    Inheritance is useful for making changes to a schema by creating a new schema version:

    • Inheriting from a schema brings into scope all the types and predicates of that schema, both qualified and unqualified.
    • The new schema also exports all the types and predicates defined in the schemas it inherits from, except those that are re-defined.

    Specifically, in the above example:

    • We can import example.2 anywhere and get all the predicates defined in example.1, except that we'll get the new Class defined in example.2.
    • We can still import example.1 and get the old version of the schema.

    Note that if you have predicates that depend on a predicate that was revised in this way, you must also copy those predicates to the new schema, because the existing predicates will refer to the old version of the predicate you revised. (In due course Glean will probably provide a convenient way to do this; in the meantime you have to copy & paste. Not a big deal because you'll usually delete the old one at some point, and you can't modify it anyway.)

    Named schemas can not form cycles through their import or inheritance declarations.

    Naming rules and conventions​

    Names take the form of a dot-separated sequence of alphanumeric words. For example, sys.Blob, clang.File, or cxx.objc.Name. The words up to the last dot are the namespace, the final word is the name.

    See Names for full details.

    Briefly:

    • Namespaces (schema names) are dot-separated sequences of identifiers each beginning with a lower-case letter
    • Names and namespaces can contain only alphanumeric characters, '_', or '.' (namespaces only)
    • There is a set of reserved words that can't be used for names, e.g. class. Syncing the schema will fail with an error if you use a reserved word.
    - + \ No newline at end of file diff --git a/docs/schema/thrift/index.html b/docs/schema/thrift/index.html index 182de987f..5682d46e8 100644 --- a/docs/schema/thrift/index.html +++ b/docs/schema/thrift/index.html @@ -5,7 +5,7 @@ Thrift and JSON | Glean - + @@ -20,7 +20,7 @@ shell, the results are printed as JSON-encoded Thrift; when you write data to Glean it can be in the form of JSON-encoded Thrift.

    The relationship between schema types and Thrift/JSON is given by the following table:

    Schema typeThrift typeJSON
    natNat (i64)123
    byteByte (i8)123
    stringstring"abc"
    boolbooltrue or false
    [byte]binarybase-64 encoded string *1
    [T]list<T>[...]
    {
    Β Β f₁ : T₁,
    Β Β ...,
    Β Β fβ‚™ : Tβ‚™
    }
    struct Foo {
    Β Β 1: T₁ f₁;
    Β Β ...
    Β Β n: Tβ‚™ fβ‚™;
    }
    {
    Β Β "f₁" : q₁,
    Β Β ...
    Β Β "fβ‚™" : qβ‚™
    }
    {
    Β Β f₁ : T₁ |
    Β Β ... |
    Β Β fβ‚™ : Tβ‚™
    }
    union Foo {
    Β Β 1: T₁ f₁;
    Β Β ...
    Β Β n: Tβ‚™ fβ‚™;
    }
    { "f" : t }
    for one of the fields f₁..fβ‚™
    maybe TIn a record field:
    optional T f
    f : t
    if the value is present
    enum {
    Β Β L₁|
    Β Β ...|
    Β Β Lβ‚™
    }
    enum Foo {
    Β Β L₁ = 1,
    Β Β ...
    Β Β Lβ‚™ = n
    }
    the index of the value,
    e.g. 12
    predicate P : K -> Vstruct P {
    Β Β 1: Id id
    Β Β 2: optional K key
    Β Β 3: optional V value
    }
    note*2
    refer to fact N:
    N or { "id": N }
    define a fact:
    { "id" : N,
    Β Β Β "key" : t } or
    { "key": t } or
    { "key": t,
    Β Β Β Β "value" : v }
    type N = Tdepending on T:
    struct N { .. }
    union N {...}
    enum N {...}
    typedef T N;
    same as type T
    1. The Thrift encoding of a binary field in JSON is a base-64-encoded string. However, not all Thrift implementations respect this. At the time of writing, the Python Thrift implementation doesn't base-64-encode binary values. For this reason we provide an option in the Glean Thrift API to disable base-64 encoding for binary if your client doesn't support it. The Glean Shell also uses this option to make it easier to work with binary.

    2. the key is optional - a nested fact may be expanded in place or represented by a reference to the fact ID only. When querying Glean data the query specifies which nested facts should be expanded in the result, and when writing data to Glean using Thrift or JSON, we can optionally specify the value of nested facts inline.

    - + \ No newline at end of file diff --git a/docs/schema/types/index.html b/docs/schema/types/index.html index dfbf022cf..acdb844d0 100644 --- a/docs/schema/types/index.html +++ b/docs/schema/types/index.html @@ -5,14 +5,14 @@ Built-in Types | Glean - +

    Built-in Types

    TypeMeaning
    nat64-bit natural numbers
    byte8-bit natural numbers
    stringUTF-8 encoded strings
    [T]lists of elements of type T
    { field₁ : T₁, ..., fieldβ‚™ : Tβ‚™ }a record with zero or more named fields
    { field₁ : T₁ | ... | fieldβ‚™ : Tβ‚™ }a sum (union) type with one or more named alternatives
    Pa reference to a fact of predicate P
    boolthe boolean type with values true and false
    maybe Tan optional value of type T
    enum { name₁ | ... | nameβ‚™ }exactly one of the symbols name₁..nameβ‚™
    - + \ No newline at end of file diff --git a/docs/schema/workflow/index.html b/docs/schema/workflow/index.html index 307e92261..135cde5be 100644 --- a/docs/schema/workflow/index.html +++ b/docs/schema/workflow/index.html @@ -5,7 +5,7 @@ Workflow | Glean - + @@ -15,7 +15,7 @@ glean/schema/thrift, which are then processed into Haskell code by

    make thrift-schema-hs

    and finally built by

    make glean

    Examples of code using these types:

    Experimenting with schemas​

    1. Modify the source files in glean/schema/source

    2. Start up the shell locally using your schema:
      glean shell --db-root ~/local/gleandb --schema glean/schema/source
      If you don't already have a ~/local/gleandb for storing local DBs, create it with mkdir ~/local/gleandb.

    3. Test it with some example data: see Loading a DB from JSON in the shell.

    4. Iterate as necessary, using :reload in the shell to reload the schema.

    - + \ No newline at end of file diff --git a/docs/server/index.html b/docs/server/index.html index 40632e26a..dcc87ee6a 100644 --- a/docs/server/index.html +++ b/docs/server/index.html @@ -5,7 +5,7 @@ Running the Glean Server | Glean - + @@ -17,7 +17,7 @@ Port number to listen on.

    The server watches for changes in any configuration files specified with config:PATH, including the schema.

    - + \ No newline at end of file diff --git a/docs/shell/index.html b/docs/shell/index.html index 12dafd55a..40665a27d 100644 --- a/docs/shell/index.html +++ b/docs/shell/index.html @@ -5,7 +5,7 @@ Using the Shell | Glean - + @@ -55,7 +55,7 @@ test your changes.
  • :statistics [PREDICATE]
    Show statistics for the current database.
  • :quit
    Leave the shell.
  • - + \ No newline at end of file diff --git a/docs/trying/index.html b/docs/trying/index.html index 7b8f32973..4f712d3f9 100644 --- a/docs/trying/index.html +++ b/docs/trying/index.html @@ -5,7 +5,7 @@ Trying Glean | Glean - + @@ -28,7 +28,7 @@ (http://localhost:8888/packages/react-dom/src/client/ReactDOMComponent.js) - note how Glean is accurately linking both local and imported symbols.

    - + \ No newline at end of file diff --git a/docs/walkthrough/index.html b/docs/walkthrough/index.html index dbd36ff93..0022dd119 100644 --- a/docs/walkthrough/index.html +++ b/docs/walkthrough/index.html @@ -5,7 +5,7 @@ Walkthrough | Glean - + @@ -22,7 +22,7 @@ in /tmp/glean/facts.glean. Then reload schema and create a database from the example data using :reload and :load <file> in the shell:

    > :reload
    reloading schema [2 schemas, 7 predicates]
    > :load /tmp/glean/facts.glean
    facts>

    Now head over to Angle Guide to try some example queries and learn about how the query language works.

    - + \ No newline at end of file diff --git a/docs/write/index.html b/docs/write/index.html index 20618c4b2..2bbcf4b5f 100644 --- a/docs/write/index.html +++ b/docs/write/index.html @@ -5,7 +5,7 @@ Writing data to Glean | Glean - + @@ -21,8 +21,8 @@ method. This creates a work queue of tasks on the server.

  • Clients obtain tasks from the server by calling getWork. Tasks may have dependencies between them, so the server won't hand out a task until its dependencies are complete.

  • When all tasks are done, the server marks the database as complete.

  • APIs for writing​

    If none of the above work for you, the Thrift API enable basic write -access to the database.

    • kickOff can be used to create a new DB
    • sendJsonBatch is for sending facts in JSON-serialized form
    • finishBatch exposes the result of a previously sent JSON batch
    • workFinished closes a DB

    A rough outline of a client looks like:

    glean = make_glean_thrift_client()
    db_handle = make_uuid()
    glean.kickOff(my_repo, KickOffFill(writeHandle=db_handle))
    for json_batch in json_batches:
    handle = glean.sendJsonBatch(json_batch)
    result = glean.finishBatch(handle)
    # handle result
    glean.workFinished(my_repo, db_handle, success_or_failure)

    Writing from the command line​

    JSON format​

    The JSON format for Glean data is described in Thrift and JSON.

    Here's an example of JSON data for writing to Glean:

    [
    { "predicate": "cxx1.Name.1", # define facts for cxx1.Name.1
    "facts": [
    { "id": 1, "key": "abc" }, # define a fact with id 1
    { "id": 2, "key": "def" }
    ]
    },
    { "predicate": "cxx1.FunctionName.1", # define facts for cxx1.FunctionName.1
    "facts": [
    { "id": 3,
    "key": {
    "name": { "id": 1 }}} # reference to fact with id 1
    ]
    },
    { "predicate": "cxx1.FunctionQName.1", # define facts for cxx1.FunctionQName.1
    "facts": [
    { "key": {
    "name": 3, # 3 is shorthand for { "id": 3 }
    "scope": { "global_": {} } } },
    { "key": {
    "name": {
    "key": { # define a nested fact directly
    "name": {
    "key": "ghi" }}}, # another nested fact
    "scope": {
    "namespace_": {
    "key": {
    "name": {
    "key": "std" }}}}}
    ]
    }
    ]

    The rules of the game are:

    • Predicate names must include versions, i.e. cxx1.Name.1 rather than cxx1.Name.
    • The id field when defining a fact is optional. The id numbers in the input file will not be the final id numbers assigned to the facts in the database.
    • There are no restrictions on id values (any 64-bit integer will do) but an id value may not be reused within a file.
    • Later facts may refer to earlier ones using either { "id": N } or just N.
    • It is only possible to refer to ids from facts in the same file, if you are writing multiple files using glean write or via the sendJsonBatch API.
    • a nested facts can be defined inline, instead of defining it with an id first and then referencing it.
    • an inline nested fact can be given an id and referred to later.

    Loading a DB from JSON in the shell​

    The shell is useful for experimenting with creating a DB from JSON data directly. Let's try loading the data above into a DB in the shell:

    $ mkdir /tmp/glean
    $ glean shell --db-root /tmp/glean
    Glean Shell, dev mode
    type :help for help.
    no fbsource database availabe
    > :load test/0 /home/smarlow/test
    I0514 01:19:37.137109 3566745 Work.hs:184] test/16: database complete

    Let's see what facts we loaded:

    test> :stat
    1
    count: 72
    size: 5988
    cxx1.FunctionName.1
    count: 2
    size: 66
    cxx1.FunctionQName.1
    count: 2
    size: 70
    cxx1.Name.1
    count: 4
    size: 148
    cxx1.NamespaceQName.1
    count: 1
    size: 35
    test>

    Note that there were 4 cxx1.Name.1 facts - some of those were defined as inline nested facts in the JSON. We can query them all:

    test> cxx1.Name _
    4 results, 1 queries, 4 facts, 0.22ms, 44296 bytes

    { "id": 1096, "key": "abc" }
    { "id": 1097, "key": "def" }
    { "id": 1100, "key": "ghi" }
    { "id": 1102, "key": "std" }

    Note that the id values here do not correspond to the id values in the input file.

    Creating a database using the command line​

    The glean command-line tool can be used to create a database directly on the server.

    To create a database from a single file of JSON facts:

    glean create --service <write-server> --finish --db <name>/<instance> <filename>

    where

    • <write-server> is the host:port of the Glean server
    • <name> is the name for your DB. For indexing repositories we normally use the name of the repository, but it's just a string, so you can use whatever you want.
    • <hash> identifies this particular instance of your database. For repositories we normally use the revision hash, but, again, it's just a string.
    • <filename> the file containing the JSON facts.

    If the file is more than, say, 100MB, this operation will probably time out sending the data to the server. To send large amounts of data you need to batch it up into multiple files, and then send it like this:

    glean create --service <write-server> --db <name>/<hash>
    glean write --service <write-server> --db <name>/<hash> <filename1>
    glean write --service <write-server> --db <name>/<hash> <filename2>
    ...
    glean finish --service <write-server> --db <name>/<hash>

    To find out if your DB made it:

    glean shell --service <write-server> :list

    This will list the DBs available on the write server.

    - +access to the database.

    • kickOff can be used to create a new DB
    • sendJsonBatch is for sending facts in JSON-serialized form
    • finishBatch exposes the result of a previously sent JSON batch
    • workFinished closes a DB

    A rough outline of a client looks like:

    glean = make_glean_thrift_client()
    db_handle = make_uuid()
    glean.kickOff(my_repo, KickOffFill(writeHandle=db_handle))
    for json_batch in json_batches:
    handle = glean.sendJsonBatch(json_batch)
    result = glean.finishBatch(handle)
    # handle result
    glean.workFinished(my_repo, db_handle, success_or_failure)

    Writing from the command line​

    JSON format​

    The JSON format for Glean data is described in Thrift and JSON.

    Here's an example of JSON data for writing to Glean:

    [
    { "predicate": "cxx1.Name.1", # define facts for cxx1.Name.1
    "facts": [
    { "id": 1, "key": "abc" }, # define a fact with id 1
    { "id": 2, "key": "def" }
    ]
    },
    { "predicate": "cxx1.FunctionName.1", # define facts for cxx1.FunctionName.1
    "facts": [
    { "id": 3,
    "key": {
    "name": { "id": 1 }}} # reference to fact with id 1
    ]
    },
    { "predicate": "cxx1.FunctionQName.1", # define facts for cxx1.FunctionQName.1
    "facts": [
    { "key": {
    "name": 3, # 3 is shorthand for { "id": 3 }
    "scope": { "global_": {} } } },
    { "key": {
    "name": {
    "key": { # define a nested fact directly
    "name": {
    "key": "ghi" }}}, # another nested fact
    "scope": {
    "namespace_": {
    "key": {
    "name": {
    "key": "std" }}}}}
    ]
    }
    ]

    The rules of the game are:

    • Predicate names must include versions, i.e. cxx1.Name.1 rather than cxx1.Name.
    • The id field when defining a fact is optional. The id numbers in the input file will not be the final id numbers assigned to the facts in the database.
    • There are no restrictions on id values (any 64-bit integer will do) but an id value may not be reused within a file.
    • Later facts may refer to earlier ones using either { "id": N } or just N.
    • It is only possible to refer to ids from facts in the same file, if you are writing multiple files using glean write or via the sendJsonBatch API.
    • a nested facts can be defined inline, instead of defining it with an id first and then referencing it.
    • an inline nested fact can be given an id and referred to later.

    Loading a DB from JSON in the shell​

    The shell is useful for experimenting with creating a DB from JSON data directly. Let's try loading the data above into a DB in the shell:

    $ mkdir /tmp/glean
    $ glean shell --db-root /tmp/glean
    Glean Shell, dev mode
    type :help for help.
    no fbsource database availabe
    > :load test/0 /home/smarlow/test
    I0514 01:19:37.137109 3566745 Work.hs:184] test/16: database complete

    Let's see what facts we loaded:

    test> :stat
    1
    count: 72
    size: 5988
    cxx1.FunctionName.1
    count: 2
    size: 66
    cxx1.FunctionQName.1
    count: 2
    size: 70
    cxx1.Name.1
    count: 4
    size: 148
    cxx1.NamespaceQName.1
    count: 1
    size: 35
    test>

    Note that there were 4 cxx1.Name.1 facts - some of those were defined as inline nested facts in the JSON. We can query them all:

    test> cxx1.Name _
    4 results, 1 queries, 4 facts, 0.22ms, 44296 bytes

    { "id": 1096, "key": "abc" }
    { "id": 1097, "key": "def" }
    { "id": 1100, "key": "ghi" }
    { "id": 1102, "key": "std" }

    Note that the id values here do not correspond to the id values in the input file.

    Creating a database using the command line​

    The glean command-line tool can be used to create a database directly on the server.

    To create a database from a single file of JSON facts:

    glean create --service <write-server> --finish --db <name>/<instance> <filename>

    where

    • <write-server> is the host:port of the Glean server
    • <name> is the name for your DB. For indexing repositories we normally use the name of the repository, but it's just a string, so you can use whatever you want.
    • <hash> identifies this particular instance of your database. For repositories we normally use the revision hash, but, again, it's just a string.
    • <filename> the file containing the JSON facts.

    If the file is more than, say, 100MB, this operation will probably time out sending the data to the server. To send large amounts of data you need to batch it up into multiple files, and then send it like this:

    glean create --service <write-server> --db <name>/<hash>
    glean write --service <write-server> --db <name>/<hash> <filename1>
    glean write --service <write-server> --db <name>/<hash> <filename2>
    ...
    glean finish --service <write-server> --db <name>/<hash>

    To find out if your DB made it:

    glean shell --service <write-server> :list

    This will list the DBs available on the write server.

    + \ No newline at end of file diff --git a/index.html b/index.html index 9de7dc8a5..b0c16dafd 100644 --- a/index.html +++ b/index.html @@ -5,14 +5,14 @@ Glean | Glean - +
    Glean Logo

    Glean

    System for collecting, deriving and querying facts about source code

    Key Features

    Rich types

    Store detailed information about code

    Compact storage

    Store data about code at scale

    Efficient queries

    Build experiences with deep insights from code

    - + \ No newline at end of file