Skip to content

Commit

Permalink
feat(php): Support case insensitive function calls and classes (semgr…
Browse files Browse the repository at this point in the history
…ep#8356)

## What
Adds matching support for languages that have case insensitive
identifiers and demonstrates their usage for Php.

closes semgrep#7231

## How
Adds a boolean field to `id_info` fields and updates
`Generic_vs_generic.ml` and `Matching_generic.ml` to respect these
fields. I originally thought it would be easier to add a special case
for Php in matching, but extending `Matching_generic.ml` to be language
specific becomes troublesome because `equal_ast_bound_code` is called
from outside this submodules in contexts that could possibly be
addressing variables for multiple languages (or at least that was my
take on the situation and types at play).

## Remaining Work To Do
This was shared as a draft to communicate my work and get feedback about
how to add the bitfield to id_info and update the submodule.
- [x] Modify `id_info` to contain a `id_flags` field that contains a
bitfield instead of having two separate boolean fields
(id_case_insensitive, and id_hidden).
- [x] Clean up code and document purpose.
- [x] Add change log entry.
- [x] Submit pull request for `semgrep-rules` submodule and update
submodule to point to main branch again. (Not actually sure the correct
order to do this without breaking things). [(ongoing
here)](semgrep/semgrep-rules#3013)

## Testing
Adds a few test cases matching against identifiers and metavariables in
a case insensitive fashion and updates `tests/semgrep-rules` that were
disabled due lack of support for this. Note, `tests/semgrep-rules` is
currently pointing to a branch that I need to open a pull request for.

PR checklist:

- [x] Purpose of the code is [evident to future
readers](https://semgrep.dev/docs/contributing/contributing-code/#explaining-code)
- [x] Tests included or PR comment includes a reproducible test plan
- [x] Documentation is up-to-date
- [x] A changelog entry was [added to
changelog.d](https://semgrep.dev/docs/contributing/contributing-code/#adding-a-changelog-entry)
for any user-facing change
- [x] Change has no security implications (otherwise, ping security
team)

If you're unsure about any of this, please see:

- [Contribution
guidelines](https://semgrep.dev/docs/contributing/contributing-code)!
- [One of the more specific guides located
here](https://semgrep.dev/docs/contributing/contributing/)
  • Loading branch information
Andre Kuhlenschmidt authored and cretoxyrhina committed Oct 17, 2023
1 parent 63d6db7 commit 17bb798
Show file tree
Hide file tree
Showing 18 changed files with 207 additions and 75 deletions.
3 changes: 3 additions & 0 deletions changelog.d/gh-8356.added
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added general machinery to support languages with case insensitive identifiers and generalized php to use these case insensitive identifiers.

For example, in php the pattern `MyClass()` will now match calls with different capitalization such as `myclass()` and `Myclass()`.
36 changes: 23 additions & 13 deletions languages/php/generic/php_to_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ let ident v = wrap string v
let var v = wrap string v
let qualified_ident v = list ident v

let name_of_qualified_ident xs =
(* Note, Funtions and Classes are case insensitive, variables are case
* sensitive. *)
let name_of_qualified_ident ~case_insensitive xs =
let xs = qualified_ident xs in
H.name_of_ids xs
H.name_of_ids ~case_insensitive xs

let name v = qualified_ident v
let fixOp x = x
Expand Down Expand Up @@ -189,15 +191,15 @@ let rec stmt_aux = function
|> list (fun (v1, v2) ->
let v1 = var v1 and v2 = option expr v2 in
let attrs = [ G.KeywordAttr (G.Static, t) ] in
let ent = G.basic_entity v1 ~attrs in
let ent = G.basic_entity v1 ~case_insensitive:false ~attrs in
let def = { G.vinit = v2; vtype = None } in
G.DefStmt (ent, G.VarDef def) |> G.s)
| Global (t, v1) ->
v1
|> Common.map (fun e ->
match e with
| Id [ id ] ->
let ent = G.basic_entity id in
let ent = G.basic_entity ~case_insensitive:false id in
G.DefStmt (ent, G.UseOuterDecl t) |> G.s
| _ ->
let e = expr e in
Expand Down Expand Up @@ -256,7 +258,7 @@ and expr e : G.expr =
let v1 = wrap string v1 in
G.L (G.String (fb v1)) |> G.e
| Id v1 ->
let v1 = name_of_qualified_ident v1 in
let v1 = name_of_qualified_ident ~case_insensitive:true v1 in
G.N v1 |> G.e
| IdSpecial v1 -> special v1
(* unify Id and Var, finally *)
Expand Down Expand Up @@ -484,7 +486,7 @@ and array_value v = expr v
and hint_type = function
| Hint v1 ->
let v1 = name v1 in
G.TyN (name_of_qualified_ident v1) |> G.t
G.TyN (name_of_qualified_ident ~case_insensitive:true v1) |> G.t
| HintArray t -> G.ty_builtin ("array", t)
| HintQuestion (t, v1) ->
let v1 = hint_type v1 in
Expand Down Expand Up @@ -539,7 +541,9 @@ and func_def
in
let attrs = list attribute f_attrs in
let body = stmt f_body in
let ent = G.basic_entity id ~attrs:(modifiers @ attrs) in
let ent =
G.basic_entity id ~attrs:(modifiers @ attrs) ~case_insensitive:true
in
let def =
{ G.fparams = fb params; frettype = fret; fbody = G.FBStmt body; fkind }
in
Expand Down Expand Up @@ -582,13 +586,15 @@ and parameter_classic { p_type; p_ref; p_name; p_default; p_attrs; p_variadic }

and modifier v = wrap modifierbis v

(* TODO: attributes are probably case-insensitive because they refer
to class names. This need to be verified. *)
and attribute v =
match v with
| Id xs ->
let name = name_of_qualified_ident xs in
let name = name_of_qualified_ident ~case_insensitive:true xs in
G.NamedAttr (fake "@", name, fb [])
| Call (Id xs, args) ->
let name = name_of_qualified_ident xs in
let name = name_of_qualified_ident ~case_insensitive:true xs in
let args = bracket (list argument) args in
G.NamedAttr (fake "@", name, args)
| _ -> raise Impossible
Expand All @@ -598,7 +604,7 @@ and constant_def { cst_name; cst_body; cst_tok = tok } =
let id = ident cst_name in
let body = expr cst_body in
let attr = [ G.KeywordAttr (G.Const, tok) ] in
let ent = G.basic_entity id ~attrs:attr in
let ent = G.basic_entity id ~case_insensitive:false ~attrs:attr in
(ent, { G.vinit = Some body; vtype = None })

and enum_type _tok { e_base; e_constraint } =
Expand Down Expand Up @@ -646,7 +652,11 @@ and class_def
@ (methods |> Common.map (fun (ent, var) -> (ent, G.FuncDef var)))
in

let ent = G.basic_entity id ~attrs:(attrs @ modifiers @ class_attrs) in
let ent =
G.basic_entity id
~attrs:(attrs @ modifiers @ class_attrs)
~case_insensitive:true
in
let def =
{
G.ckind = kind;
Expand Down Expand Up @@ -683,7 +693,7 @@ and class_var
let modifiers =
list modifier cmodifiers |> Common.map (fun m -> G.KeywordAttr m)
in
let ent = G.basic_entity id ~attrs:modifiers in
let ent = G.basic_entity id ~case_insensitive:false ~attrs:modifiers in
let def = { G.vtype = typ; vinit = value } in
(ent, def)

Expand All @@ -692,7 +702,7 @@ and method_def v = func_def v
and type_def { t_name; t_kind } =
let id = ident t_name in
let kind = type_def_kind (snd t_name) t_kind in
let ent = G.basic_entity id in
let ent = G.basic_entity ~case_insensitive:true id in
(ent, { G.tbody = kind })

and type_def_kind _tok = function
Expand Down
14 changes: 8 additions & 6 deletions libs/ast_generic/AST_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2139,21 +2139,23 @@ let p x = x
let id_info_id = IdInfoId.mk
let empty_var = { vinit = None; vtype = None }

let empty_id_info ?(hidden = false) () =
let empty_id_info ?(hidden = false) ?(case_insensitive = false)
?(id = id_info_id ()) () =
{
id_resolved = ref None;
id_type = ref None;
id_svalue = ref None;
id_flags =
ref (if hidden then IdFlags.set_hidden IdFlags.empty else IdFlags.empty);
id_info_id = id_info_id ();
id_flags = ref (IdFlags.make ~hidden ~case_insensitive);
id_info_id = id;
}

let basic_id_info ?(hidden = false) resolved =
let id_info = empty_id_info ~hidden () in
id_info.id_resolved := Some resolved;
id_info

let is_case_insensitive info = IdFlags.is_case_insensitive !(info.id_flags)

(* TODO: move AST_generic_helpers.name_of_id and ids here *)

let dotted_to_canonical xs = Common.map fst xs
Expand All @@ -2164,8 +2166,8 @@ let canonical_to_dotted tid xs = xs |> Common.map (fun s -> (s, tid))
(* ------------------------------------------------------------------------- *)

(* alt: could use @@deriving make *)
let basic_entity ?hidden ?(attrs = []) ?(tparams = []) id =
let idinfo = empty_id_info ?hidden () in
let basic_entity ?hidden ?case_insensitive ?(attrs = []) ?(tparams = []) id =
let idinfo = empty_id_info ?hidden ?case_insensitive () in
{ name = EN (Id (id, idinfo)); attrs; tparams }

(* ------------------------------------------------------------------------- *)
Expand Down
7 changes: 4 additions & 3 deletions libs/ast_generic/AST_generic_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ let add_type_args_opt_to_name name topt =
| None -> name
| Some t -> add_type_args_to_name name t

let name_of_ids xs =
let name_of_ids ?(case_insensitive = false) xs =
match List.rev xs with
| [] -> failwith "name_of_ids: empty ids"
| [ x ] -> Id (x, empty_id_info ())
| [ x ] -> Id (x, empty_id_info ~case_insensitive ())
| x :: xs ->
let qualif =
if xs =*= [] then None
Expand Down Expand Up @@ -136,7 +136,8 @@ let add_suffix_to_name suffix name =
name_middle = new_name_middle;
}

let name_of_id id = Id (id, empty_id_info ())
let name_of_id ?(case_insensitive = false) id =
Id (id, empty_id_info ~case_insensitive ())

let name_of_dot_access e =
let rec fetch_ids = function
Expand Down
6 changes: 4 additions & 2 deletions libs/ast_generic/AST_generic_helpers.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ val funcbody_to_stmt : AST_generic.function_body -> AST_generic.stmt

(* name building *)

val name_of_id : AST_generic.ident -> AST_generic.name
val name_of_ids : AST_generic.dotted_ident -> AST_generic.name
val name_of_id : ?case_insensitive:bool -> AST_generic.ident -> AST_generic.name

val name_of_ids :
?case_insensitive:bool -> AST_generic.dotted_ident -> AST_generic.name

val name_of_ids_with_opt_typeargs :
(AST_generic.ident * AST_generic.type_arguments option) list ->
Expand Down
9 changes: 9 additions & 0 deletions libs/ast_generic/IdFlags.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@
open Ppx_hash_lib.Std.Hash.Builtin

let bitmask_HIDDEN = 0x01
let bitmask_CASE_INSENSITIVE = 0x02

type t = int [@@deriving show, eq, ord, hash]

let empty = 0
let is_hidden flags = Int.logand flags bitmask_HIDDEN <> 0
let set_hidden flags = Int.logor flags bitmask_HIDDEN
let is_case_insensitive flags = Int.logand flags bitmask_CASE_INSENSITIVE <> 0
let set_case_insensitive flags = Int.logor flags bitmask_CASE_INSENSITIVE
let to_int x = x

let make ~hidden ~case_insensitive =
let flags = empty in
let flags = if hidden then set_hidden flags else flags in
let flags = if case_insensitive then set_case_insensitive flags else flags in
flags
12 changes: 12 additions & 0 deletions libs/ast_generic/IdFlags.mli
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ type t [@@deriving show, eq, ord, hash]
val empty : t
(** No flags set *)

val make : hidden:bool -> case_insensitive:bool -> t

val is_hidden : t -> bool
(**
Flag 'hidden' must be set for any artificial identifier that never
Expand All @@ -27,4 +29,14 @@ val is_hidden : t -> bool
*)

val set_hidden : t -> t

val is_case_insensitive : t -> bool
(**
The case_insensitive flag is indicates that equality of ASTs
should not care about the case of the characters being used to
determine if two idents are equal. This is useful for languages
like php and apex.
*)

val set_case_insensitive : t -> t
val to_int : t -> int
12 changes: 1 addition & 11 deletions src/experiments/synthesizing/Pattern_from_Code.ml
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,7 @@ let body_ellipsis t1 t2 = Block (t1, [ fk_stmt ], t2) |> G.s
let _bk f (lp, x, rp) = (lp, f x, rp)

let default_id str =
G.N
(Id
( (str, fk),
{
id_resolved = ref None;
id_type = ref None;
id_svalue = ref None;
id_flags = ref IdFlags.empty;
id_info_id = IdInfoId.unsafe_default;
} ))
|> G.e
G.N (Id ((str, fk), empty_id_info ~id:IdInfoId.unsafe_default ())) |> G.e

let default_tyvar str typ = TypedMetavar ((str, fk), fk, typ) |> G.e

Expand Down
12 changes: 1 addition & 11 deletions src/experiments/synthesizing/Pattern_from_Targets.ml
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,7 @@ let _body_ellipsis t1 t2 = Block (t1, [ fk_stmt ], t2) |> G.s
let _bk f (lp, x, rp) = (lp, f x, rp)

let default_id str =
N
(Id
( (str, fk),
{
id_resolved = ref None;
id_type = ref None;
id_svalue = ref None;
id_flags = ref IdFlags.empty;
id_info_id = IdInfoId.unsafe_default;
} ))
|> G.e
N (Id ((str, fk), empty_id_info ~id:IdInfoId.unsafe_default ())) |> G.e

let count_to_id count =
let make_id ch = Format.sprintf "$%c" ch in
Expand Down
16 changes: 14 additions & 2 deletions src/matching/Generic_vs_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ let should_match_call = function
| G.IncrDecr _ ->
false

let m_id_string case_insensitive =
if case_insensitive then fun a b ->
m_string (String.lowercase_ascii a) (String.lowercase_ascii b)
else m_string

(*****************************************************************************)
(* Name *)
(*****************************************************************************)
Expand All @@ -186,8 +191,12 @@ let m_ident a b =
| (stra, _), (strb, _) when Pattern.is_regexp_string stra ->
let re_match = Matching_generic.regexp_matcher_of_regexp_string stra in
if re_match strb then return () else fail ()
(* Note: We should try to avoid allowing case insensitive
* identifiers to get here because we have no way of
* distinguishing them from case sensitive identifiers
*)
(* general case *)
| a, b -> (m_wrap m_string) a b
| a, b -> m_wrap m_string a b

(* see also m_dotted_name_prefix_ok *)
let m_dotted_name a b =
Expand Down Expand Up @@ -570,7 +579,10 @@ and m_ident_and_id_info (a1, a2) (b1, b2) =
if re_match strb then return () else fail ()
(* general case *)
| _, _ ->
let* () = m_wrap m_string a1 b1 in
let case_insensitive =
G.is_case_insensitive a2 && B.is_case_insensitive b2
in
let* () = m_wrap (m_id_string case_insensitive) a1 b1 in
m_id_info a2 b2

and m_ident_and_empty_id_info a1 b1 =
Expand Down
Loading

0 comments on commit 17bb798

Please sign in to comment.