Skip to content

Commit

Permalink
PHP: Add api tagger + include packages in probablesinks
Browse files Browse the repository at this point in the history
  • Loading branch information
Dattaprasad Mundada authored and Dattaprasad Mundada committed Mar 26, 2024
1 parent 0210435 commit 22ecd13
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 0 deletions.
22 changes: 22 additions & 0 deletions src/main/scala/ai/privado/exporter/ProbableSinkExporter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ class ProbableSinkExporter(cpg: Cpg, ruleCache: RuleCache, repoPath: String, rep
val isJavascript = lang.toString().contains(Language.JAVASCRIPT.toString)
val isRuby = lang.toString().contains(Language.RUBY.toString)
val isGoLang = lang.toString().contains(Language.GO.toString)
val isPHP = lang.toString().contains(Language.PHP.toString)

if (repoItemTagName.isDefined)
List() // If this is an export for Monolith repoItem, don't export Probable sink, otherwise this will make the Json very big and will need separate processing on backend
else if (isJavascript) {
getProbableSinkForJavascript(repoPath)
} else if (isRuby) {
getProbableSinkForRuby(repoPath)
} else if (isPHP){
val composerDep = getProbableSinkForPHP(repoPath)
composerDep ++ getProbableSinkBasedOnTaggedMethods(isPython, isGoLang)
} else {
getProbableSinkBasedOnTaggedMethods(isPython, isGoLang)
}
Expand All @@ -50,6 +54,24 @@ class ProbableSinkExporter(cpg: Cpg, ruleCache: RuleCache, repoPath: String, rep
.filter((str) => isPrivacySink(str, ruleCache))
}

def getProbableSinkForPHP(repoPath: String): List[String] = {
// Set up a set to hold the unique dependencies
var uniqueDeps = Set.empty[String]
val packageJsonFilePaths =
getAllFilesRecursively(repoPath, Set(".json"), ruleCache)
.getOrElse(List.empty)
.filter(_.endsWith("composer.json"))

for (path <- packageJsonFilePaths) {
val packageJsonStr = scala.io.Source.fromFile(path).mkString
val json = parse(packageJsonStr).getOrElse(Json.Null)
val dependencies = json.hcursor.downField("require").as[Map[String, String]].getOrElse(Map.empty)
uniqueDeps ++= dependencies.keySet
}
uniqueDeps.toList
.filter((str) => isPrivacySink(str, ruleCache))
}

def getProbableSinkForRuby(repoPath: String): List[String] = {
// Set up a set to hold the unique dependencies
val gemFilePaths =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ package ai.privado.languageEngine.php.tagger
import ai.privado.cache.{DataFlowCache, RuleCache, TaggerCache}
import ai.privado.entrypoint.PrivadoInput
import ai.privado.languageEngine.php.tagger.source.IdentifierTagger
import ai.privado.languageEngine.php.tagger.sink.APITagger
import ai.privado.tagger.PrivadoBaseTagger
import ai.privado.tagger.sink.RegularSinkTagger
import ai.privado.tagger.source.LiteralTagger
Expand All @@ -49,6 +50,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
new LiteralTagger(cpg, rules).createAndApply()
new IdentifierTagger(cpg, rules, taggerCache).createAndApply()
new RegularSinkTagger(cpg, rules).createAndApply()
new APITagger(cpg, rules, privadoInput = privadoInputConfig).createAndApply()

logger.info("Finished tagging")
cpg.tag
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package ai.privado.languageEngine.php.tagger.sink

import ai.privado.cache.RuleCache
import ai.privado.entrypoint.{PrivadoInput, ScanProcessor}
import ai.privado.languageEngine.java.language.{NodeStarters, StepsForProperty}
import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator
import ai.privado.metric.MetricHandler
import ai.privado.model.{Constants, NodeType, RuleInfo}
import ai.privado.tagger.PrivadoParallelCpgPass
import ai.privado.tagger.utility.APITaggerUtility.sinkTagger
import ai.privado.utility.Utilities
import io.circe.Json
import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext}
import io.shiftleft.codepropertygraph.generated.nodes.Call
import io.shiftleft.codepropertygraph.generated.{Cpg, Operators}
import io.shiftleft.semanticcpg.language.*
import org.slf4j.LoggerFactory

import scala.jdk.CollectionConverters.CollectionHasAsScala
import java.util.Calendar

class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
extends PrivadoParallelCpgPass[RuleInfo](cpg) {
private val logger = LoggerFactory.getLogger(this.getClass)
val cacheCall: List[Call] = cpg.call.where(_.nameNot(Operators.ALL.asScala.toSeq: _*)).l
val constructNameCall: List[Call] = cacheCall.where(_.name("__construct")).l

val APISINKS_REGEX: String = ruleCache.getSystemConfigByKey(Constants.apiSinks)

val apis: List[Call] = cacheCall.name("(?i)" + APISINKS_REGEX).l
val constructApis: List[Call] = constructNameCall.where(_.methodFullName("(?i).*" + APISINKS_REGEX + "(->)__construct")).l

MetricHandler.metricsData("apiTaggerVersion") = Json.fromString("Common HTTP Libraries Used")
implicit val engineContext: EngineContext = Utilities.getEngineContext(privadoInput, 4)
val commonHttpPackages: String = ruleCache.getSystemConfigByKey(Constants.apiHttpLibraries)

val httpApis: List[Call] = (apis ++ constructApis)
.or(_.methodFullName(commonHttpPackages), _.filter(_.dynamicTypeHintFullName.exists(_.matches(commonHttpPackages))))
.l

// Support to use `identifier` in API's
val identifierRegex: String = ruleCache.getSystemConfigByKey(Constants.apiIdentifier)

override def generateParts(): Array[_ <: AnyRef] = {
ruleCache.getRule.sinks
.filter(rule => rule.nodeType.equals(NodeType.API))
.toArray
}

override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = {
val apiInternalSources = cpg.literal.code("(?:\"|'){0,1}(" + ruleInfo.combinedRulePattern + ")(?:\"|'){0,1}").l
val propertySources = cpg.property.filter(p => p.value matches (ruleInfo.combinedRulePattern)).usedAt.l

val identifierSource = {
if (!ruleInfo.id.equals(Constants.internalAPIRuleId))
cpg.identifier(identifierRegex).l ++ cpg.property.filter(p => p.name matches (identifierRegex)).usedAt.l
else
List()
}

logger.debug("Using Enhanced API tagger to find API sinks")
sinkTagger(
apiInternalSources ++ propertySources ++ identifierSource,
(httpApis).distinct,
builder,
ruleInfo,
ruleCache,
privadoInput
)
}
}

0 comments on commit 22ecd13

Please sign in to comment.