From 22ecd13196a204ba973f03fcca5d21fa7f10fa24 Mon Sep 17 00:00:00 2001 From: Dattaprasad Mundada Date: Tue, 26 Mar 2024 11:53:07 +0530 Subject: [PATCH] PHP: Add api tagger + include packages in probablesinks --- .../exporter/ProbableSinkExporter.scala | 22 ++++++ .../php/tagger/PrivadoTagger.scala | 2 + .../php/tagger/sink/APITagger.scala | 71 +++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala diff --git a/src/main/scala/ai/privado/exporter/ProbableSinkExporter.scala b/src/main/scala/ai/privado/exporter/ProbableSinkExporter.scala index 7c16b87ef..c28058b59 100644 --- a/src/main/scala/ai/privado/exporter/ProbableSinkExporter.scala +++ b/src/main/scala/ai/privado/exporter/ProbableSinkExporter.scala @@ -20,6 +20,7 @@ class ProbableSinkExporter(cpg: Cpg, ruleCache: RuleCache, repoPath: String, rep val isJavascript = lang.toString().contains(Language.JAVASCRIPT.toString) val isRuby = lang.toString().contains(Language.RUBY.toString) val isGoLang = lang.toString().contains(Language.GO.toString) + val isPHP = lang.toString().contains(Language.PHP.toString) if (repoItemTagName.isDefined) List() // If this is an export for Monolith repoItem, don't export Probable sink, otherwise this will make the Json very big and will need separate processing on backend @@ -27,6 +28,9 @@ class ProbableSinkExporter(cpg: Cpg, ruleCache: RuleCache, repoPath: String, rep getProbableSinkForJavascript(repoPath) } else if (isRuby) { getProbableSinkForRuby(repoPath) + } else if (isPHP){ + val composerDep = getProbableSinkForPHP(repoPath) + composerDep ++ getProbableSinkBasedOnTaggedMethods(isPython, isGoLang) } else { getProbableSinkBasedOnTaggedMethods(isPython, isGoLang) } @@ -50,6 +54,24 @@ class ProbableSinkExporter(cpg: Cpg, ruleCache: RuleCache, repoPath: String, rep .filter((str) => isPrivacySink(str, ruleCache)) } + def getProbableSinkForPHP(repoPath: String): List[String] = { + // Set up a set to hold the unique dependencies + var uniqueDeps = Set.empty[String] + val packageJsonFilePaths = + getAllFilesRecursively(repoPath, Set(".json"), ruleCache) + .getOrElse(List.empty) + .filter(_.endsWith("composer.json")) + + for (path <- packageJsonFilePaths) { + val packageJsonStr = scala.io.Source.fromFile(path).mkString + val json = parse(packageJsonStr).getOrElse(Json.Null) + val dependencies = json.hcursor.downField("require").as[Map[String, String]].getOrElse(Map.empty) + uniqueDeps ++= dependencies.keySet + } + uniqueDeps.toList + .filter((str) => isPrivacySink(str, ruleCache)) + } + def getProbableSinkForRuby(repoPath: String): List[String] = { // Set up a set to hold the unique dependencies val gemFilePaths = diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala index 2ab1e0507..f0e928bdf 100644 --- a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala @@ -26,6 +26,7 @@ package ai.privado.languageEngine.php.tagger import ai.privado.cache.{DataFlowCache, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.php.tagger.source.IdentifierTagger +import ai.privado.languageEngine.php.tagger.sink.APITagger import ai.privado.tagger.PrivadoBaseTagger import ai.privado.tagger.sink.RegularSinkTagger import ai.privado.tagger.source.LiteralTagger @@ -49,6 +50,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new LiteralTagger(cpg, rules).createAndApply() new IdentifierTagger(cpg, rules, taggerCache).createAndApply() new RegularSinkTagger(cpg, rules).createAndApply() + new APITagger(cpg, rules, privadoInput = privadoInputConfig).createAndApply() logger.info("Finished tagging") cpg.tag diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala new file mode 100644 index 000000000..c4a5a6a98 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala @@ -0,0 +1,71 @@ +package ai.privado.languageEngine.php.tagger.sink + +import ai.privado.cache.RuleCache +import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} +import ai.privado.languageEngine.java.language.{NodeStarters, StepsForProperty} +import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator +import ai.privado.metric.MetricHandler +import ai.privado.model.{Constants, NodeType, RuleInfo} +import ai.privado.tagger.PrivadoParallelCpgPass +import ai.privado.tagger.utility.APITaggerUtility.sinkTagger +import ai.privado.utility.Utilities +import io.circe.Json +import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext} +import io.shiftleft.codepropertygraph.generated.nodes.Call +import io.shiftleft.codepropertygraph.generated.{Cpg, Operators} +import io.shiftleft.semanticcpg.language.* +import org.slf4j.LoggerFactory + +import scala.jdk.CollectionConverters.CollectionHasAsScala +import java.util.Calendar + +class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) + extends PrivadoParallelCpgPass[RuleInfo](cpg) { + private val logger = LoggerFactory.getLogger(this.getClass) + val cacheCall: List[Call] = cpg.call.where(_.nameNot(Operators.ALL.asScala.toSeq: _*)).l + val constructNameCall: List[Call] = cacheCall.where(_.name("__construct")).l + + val APISINKS_REGEX: String = ruleCache.getSystemConfigByKey(Constants.apiSinks) + + val apis: List[Call] = cacheCall.name("(?i)" + APISINKS_REGEX).l + val constructApis: List[Call] = constructNameCall.where(_.methodFullName("(?i).*" + APISINKS_REGEX + "(->)__construct")).l + + MetricHandler.metricsData("apiTaggerVersion") = Json.fromString("Common HTTP Libraries Used") + implicit val engineContext: EngineContext = Utilities.getEngineContext(privadoInput, 4) + val commonHttpPackages: String = ruleCache.getSystemConfigByKey(Constants.apiHttpLibraries) + + val httpApis: List[Call] = (apis ++ constructApis) + .or(_.methodFullName(commonHttpPackages), _.filter(_.dynamicTypeHintFullName.exists(_.matches(commonHttpPackages)))) + .l + + // Support to use `identifier` in API's + val identifierRegex: String = ruleCache.getSystemConfigByKey(Constants.apiIdentifier) + + override def generateParts(): Array[_ <: AnyRef] = { + ruleCache.getRule.sinks + .filter(rule => rule.nodeType.equals(NodeType.API)) + .toArray + } + + override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = { + val apiInternalSources = cpg.literal.code("(?:\"|'){0,1}(" + ruleInfo.combinedRulePattern + ")(?:\"|'){0,1}").l + val propertySources = cpg.property.filter(p => p.value matches (ruleInfo.combinedRulePattern)).usedAt.l + + val identifierSource = { + if (!ruleInfo.id.equals(Constants.internalAPIRuleId)) + cpg.identifier(identifierRegex).l ++ cpg.property.filter(p => p.name matches (identifierRegex)).usedAt.l + else + List() + } + + logger.debug("Using Enhanced API tagger to find API sinks") + sinkTagger( + apiInternalSources ++ propertySources ++ identifierSource, + (httpApis).distinct, + builder, + ruleInfo, + ruleCache, + privadoInput + ) + } +}