Skip to content

Commit

Permalink
Experiment: using capabilities for the semgrep codebase (semgrep#9350)
Browse files Browse the repository at this point in the history
See the talk I'll make to the PA/SR talk for the motivations and
explanations of this work.
  • Loading branch information
Yoann Padioleau authored Dec 5, 2023
1 parent 988818d commit 3dce938
Show file tree
Hide file tree
Showing 51 changed files with 392 additions and 297 deletions.
70 changes: 21 additions & 49 deletions TCB/Cap.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ module FS = struct
type root_w = cap
type cwd_r = cap
type cwd_w = cap
type tmp_r = cap
type tmp_w = cap
type tmp = cap
end

(**************************************************************************)
Expand Down Expand Up @@ -159,68 +158,46 @@ end

type root = < root_r : FS.root_r ; root_w : FS.root_w >
type cwd = < cwd_r : FS.cwd_r ; cwd_w : FS.cwd_w >
type tmp = < tmp_r : FS.tmp_r ; tmp_w : FS.tmp_w >
type tmp = < tmp : FS.tmp >
type fs = < root ; cwd ; tmp >
type console = < stdin : Console.stdin ; stdout : Console.stdout >

type process_multi =
< fork : Process.fork ; domain : Process.domain ; thread : Process.thread >

type process_single = < signal : Process.signal ; exit : Process.exit >

type process =
< console
; process_single
; process_multi
; argv : Process.argv
; env : Process.env >
type stdin = < stdin : Console.stdin >
type stdout = < stdout : Console.stdout >
type console = < stdin ; stdout >
type fork = < fork : Process.fork >
type domain = < domain : Process.domain >
type thread = < thread : Process.thread >
type process_multi = < fork ; domain ; thread >
type signal = < signal : Process.signal >
type exit = < exit : Process.exit >
type process_single = < signal ; exit >
type argv = < argv : Process.argv >
type env = < env : Process.env >
type process = < console ; process_single ; process_multi ; argv ; env >
type exec = < exec : Exec.t >

(* TODO: extend *)
type network = < network : Network.t >
type misc = < time : Misc.time ; random : Misc.random >
type time = < time : Misc.time >
type random = < random : Misc.random >
type misc = < time ; random >

(* alt: called "Stdenv.Base.env" in EIO *)
type all_caps =
< process
; fs (* a mix of fs and process_multi as it requires both *)
; exec : Exec.t
; exec
; network
; misc >

(*
(* "subtypes" of powerbox *)
type no_network = {
process : process_powerbox;
fs : fs_powerbox;
exec : Exec.t;
}
type no_exec = { process : process_powerbox; fs : fs_powerbox }
type no_fs = { process : process_powerbox }
type no_concurrency = {
stdin : Console.stdin;
stdout : Console.stdout;
argv : Process.argv;
env : Process.env;
}
*)

type no_cap = unit (* better than [type no_cap = cap] :) *)

let powerbox : all_caps =
object
(*let fs_powerbox : fs_powerbox = object *)
method root_r = ()
method root_w = ()
method cwd_r = ()
method cwd_w = ()
method tmp_r = ()
method tmp_w = ()
(*end*)

(*let process_powerbox : process_powerbox = object *)
method tmp = ()
method stdin = ()
method stdout = ()
method argv = ()
Expand All @@ -230,13 +207,8 @@ let powerbox : all_caps =
method exit = ()
method domain = ()
method thread = ()
(* end *)

(* let misc_powerbox : misc_powerbox = object *)
method time = ()
method random = ()
(*end *)

method exec = ()
method network = ()
end
Expand Down
47 changes: 26 additions & 21 deletions TCB/Cap.mli
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
(* Capabilities implemented as simple abstract types and explicit
* parameters ("Lambda the ultimate security tool").
*
* Note that most of the types below are on purpose [abstract] and there is
* Note that most of the types below are on purpose abstract and there is
* no way to build/forge them except by calling the restricted (statically
* and dynamically) Cap.main() below which is passing all capabilities
* to the entry point of your program.
* to the entry point of your program. This entry point can then restrict
* the set of capabilities to pass to other functions by using the :> cast
* operator.
*)

(**************************************************************************)
Expand All @@ -22,7 +24,7 @@ module Process : sig
type argv
type env

(* advanced stuff *)
(* advanced stuff (TODO? pid? kill? split signal?) *)
type signal
type exit

Expand All @@ -34,12 +36,12 @@ end

(* read/write on root/cwd/tmp *)
module FS : sig
(* LATER: could restrict to root_but_no_proc_nor_sys_nor_etc *)
type root_r
type root_w
type cwd_r
type cwd_w
type tmp_r
type tmp_w
type tmp
end

module Exec : sig
Expand All @@ -66,31 +68,34 @@ end

type root = < root_r : FS.root_r ; root_w : FS.root_w >
type cwd = < cwd_r : FS.cwd_r ; cwd_w : FS.cwd_w >
type tmp = < tmp_r : FS.tmp_r ; tmp_w : FS.tmp_w >
type tmp = < tmp : FS.tmp >
type fs = < root ; cwd ; tmp >
type console = < stdin : Console.stdin ; stdout : Console.stdout >

type process_multi =
< fork : Process.fork ; domain : Process.domain ; thread : Process.thread >

type process_single = < signal : Process.signal ; exit : Process.exit >

type process =
< console
; process_single
; process_multi
; argv : Process.argv
; env : Process.env >
type stdin = < stdin : Console.stdin >
type stdout = < stdout : Console.stdout >
type console = < stdin ; stdout >
type fork = < fork : Process.fork >
type domain = < domain : Process.domain >
type thread = < thread : Process.thread >
type process_multi = < fork ; domain ; thread >
type signal = < signal : Process.signal >
type exit = < exit : Process.exit >
type process_single = < signal ; exit >
type argv = < argv : Process.argv >
type env = < env : Process.env >
type process = < console ; process_single ; process_multi ; argv ; env >
type exec = < exec : Exec.t >

(* TODO: extend *)
type network = < network : Network.t >
type misc = < time : Misc.time ; random : Misc.random >
type time = < time : Misc.time >
type random = < random : Misc.random >
type misc = < time ; random >

(* alt: called "Stdenv.Base.env" in EIO *)
type all_caps =
< process
; fs (* a mix of fs and process_multi as it requires both *)
; exec : Exec.t
; exec
; network
; misc >

Expand Down
3 changes: 2 additions & 1 deletion TCB/dune
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; Trusted Computing Base (see https://en.wikipedia.org/wiki/Trusted_computing_base)
; The OCaml Trusted Computing Base!
; see https://en.wikipedia.org/wiki/Trusted_computing_base for more info
(library
(public_name TCB)
(wrapped false)
Expand Down
8 changes: 4 additions & 4 deletions libs/commons/JSON.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ let rec ezjsonm_to_yojson (json : ezjsonm) : yojson =
| `Float f -> `Float f
| `Null -> `Null

let load_json file =
let y = Y.from_file file in
from_yojson y

let json_of_string str =
let y = Y.from_string str in
from_yojson y

let json_of_chan (chan : Chan.i) =
let y = Y.from_channel chan.ic in
from_yojson y

let string_of_json ?compact ?recursive ?allow_nan json =
ignore (compact, recursive, allow_nan);
let y = to_yojson json in
Expand Down
4 changes: 1 addition & 3 deletions libs/commons/JSON.mli
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@ val to_yojson : t -> yojson
val from_yojson : yojson -> t
val yojson_to_ezjsonm : yojson -> ezjsonm
val ezjsonm_to_yojson : ezjsonm -> yojson

(* TODO: use Fpath.t *)
val load_json : string (* filename *) -> t
val json_of_string : str -> t
val json_of_chan : Chan.i -> t

val string_of_json :
?compact:bool -> ?recursive:bool -> ?allow_nan:bool -> t -> str
Expand Down
9 changes: 7 additions & 2 deletions libs/git_wrapper/Git_wrapper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ let logger = Logging.get_logger [ __MODULE__ ]
(* Types and constants *)
(*****************************************************************************)

(* TODO: could also do
* [type git_cap] abstract type and then
* let git_cap_of_exec _caps = unit
*)

type status = {
added : string list;
modified : string list;
Expand Down Expand Up @@ -136,7 +141,7 @@ let range_of_git_diff lines =
(* Entry points *)
(*****************************************************************************)

let git_check_output (args : Cmd.args) : string =
let git_check_output _caps (args : Cmd.args) : string =
let cmd : Cmd.t = (git, args) in
match UCmd.string_of_run ~trim:true cmd with
| Ok (str, (_, `Exited 0)) -> str
Expand Down Expand Up @@ -425,7 +430,7 @@ let git_log_json_format =
\"contributor\": {\"commit_author_name\": \"%an\", \"commit_author_email\": \
\"%ae\"}}"

let time_to_str (timestamp : Common2.float_time) : string =
let time_to_str (timestamp : float) : string =
let date = Unix.gmtime timestamp in
let year = date.tm_year + 1900 in
let month = date.tm_mon + 1 in
Expand Down
4 changes: 3 additions & 1 deletion libs/git_wrapper/Git_wrapper.mli
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
(* Small wrapper around the 'git' command-line program *)

(* TODO: make sub capability with cap_git_exec *)

exception Error of string

(* very general helper to run a git command and return its output
* if everthing went fine or log the error (using Logs) and
* raise an Error otherwise
*)
val git_check_output : Cmd.args -> string
val git_check_output : Cap.Exec.t -> Cmd.args -> string

(*
This is incomplete. Git offer a variety of filters and subfilters,
Expand Down
1 change: 1 addition & 0 deletions libs/git_wrapper/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
(public_name git_wrapper)
(name git_wrapper)
(wrapped false)
; (flags (:standard -open TCB -open Commons_TCB))
(flags (:standard -open No_TCB))
(libraries
fpath
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Eval_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ let metavar_of_json s = function
* so this format is not used anymore in semgrep-python, but we still
* use it for some of our regression tests in tests/eval/.
*)
let parse_json file =
let json = JSON.load_json file in
let parse_json (file : string) : env * code =
let json = UChan.with_open_in (Fpath.v file) JSON.json_of_chan in
match json with
| J.Object xs -> (
match Assoc.sort_by_key_lowfirst xs with
Expand Down
30 changes: 16 additions & 14 deletions src/main/Main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,19 @@
* and rename this binary to simply 'semgrep'.
*)
let () =
match Filename.basename Sys.argv.(0) with
(* osemgrep!! *)
| "osemgrep.bc"
| "osemgrep" ->
let exit_code = CLI.main Sys.argv in
(* remove? or make debug-only? or use Logs.info? *)
if exit_code <> Exit_code.ok then
Printf.eprintf "Error: %s\nExiting with error status %i: %s\n%!"
(Exit_code.to_message exit_code)
(Exit_code.to_int exit_code)
(String.concat " " (Array.to_list Sys.argv));
exit (Exit_code.to_int exit_code)
(* legacy semgrep-core *)
| _ -> Core_CLI.main Sys.argv
Cap.main (fun (caps : Cap.all_caps) ->
let argv = CapSys.argv caps#argv in
match Filename.basename argv.(0) with
(* osemgrep!! *)
| "osemgrep.bc"
| "osemgrep" ->
let exit_code = CLI.main caps argv in
(* remove? or make debug-only? or use Logs.info? *)
if exit_code <> Exit_code.ok then
Printf.eprintf "Error: %s\nExiting with error status %i: %s\n%!"
(Exit_code.to_message exit_code)
(Exit_code.to_int exit_code)
(String.concat " " (Array.to_list argv));
CapStdlib.exit caps#exit (Exit_code.to_int exit_code)
(* legacy semgrep-core *)
| _ -> Core_CLI.main argv)
10 changes: 7 additions & 3 deletions src/main/dune
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
(executables
(names Main)
(flags
(:include flags.sexp)
; (:standard -open TCB -open Commons_TCB)
)
(libraries
TCB
semgrep_core_cli
osemgrep_cli
lwt_platform.unix
)
(preprocess (pps ppx_profiling))
; for ocamldebug
(modes native byte)
(flags (:include flags.sexp))
; for ocamldebug
(modes native byte)
)


Expand Down
Loading

0 comments on commit 3dce938

Please sign in to comment.