diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml
index d3343317a..813e90ed1 100644
--- a/.github/workflows/code-quality.yml
+++ b/.github/workflows/code-quality.yml
@@ -35,6 +35,6 @@ jobs:
distribution: 'temurin'
java-version: '17'
- name: Run unit test
- run: sbt test test:test
+ run: sbt -J-Xmx4G test test:test
- run: echo "Previous step failed because unit test failed."
- if: ${{ failure() }}
\ No newline at end of file
+ if: ${{ failure() }}
diff --git a/Dockerfile b/Dockerfile
index ed9d22cf9..1ab0ec1d8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
ARG VERSION=1.0.0
FROM openjdk:18.0.2.1-jdk-bullseye as build
-RUN apt update && apt install -y python3 git curl bash ruby-full
+RUN apt update && apt install -y python3 git curl bash ruby-full php
RUN ln -sf python3 /usr/bin/python
ENV SBT_VERSION 1.7.1
ENV SBT_HOME /usr/local/sbt
diff --git a/README.md b/README.md
index 59829fb67..6b80aed44 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
Privado Core
=============================================
-Branch structure
+Branch structure
main - This branch will contain the released version of the code.
diff --git a/build.sbt b/build.sbt
index 021254cc3..14fb59ea3 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,4 +1,5 @@
import sbt.Credentials
+import better.files.File
name := "privado-core"
ThisBuild / organization := "ai.privado"
@@ -206,12 +207,42 @@ stage := Def
Compile / compile := ((Compile / compile) dependsOn dotnetAstGenDlTask).value
-// Also remove astgen binaries with clean, e.g., to allow for updating them.
+// Download php-parser: start
+// This is based on how php2cpg vendors the php-parser in Joern
+val phpParserVersion = "4.15.7"
+val upstreamParserBinName = "php-parser.phar"
+val versionedParserBinName = s"php-parser-$phpParserVersion.phar"
+val phpParserDlUrl =
+ s"https://github.com/joernio/PHP-Parser/releases/download/v$phpParserVersion/$upstreamParserBinName"
+
+Compile / compile := ((Compile / compile) dependsOn phpParseDlTask).value
+
+lazy val phpParseDlTask = taskKey[Unit]("Download php-parser binaries")
+phpParseDlTask := {
+ val phpBinDir = baseDirectory.value / "bin" / "php-parser"
+ phpBinDir.mkdirs()
+
+ val downloadedFile = SimpleCache.downloadMaybe(phpParserDlUrl)
+ IO.copyFile(downloadedFile, phpBinDir / versionedParserBinName)
+
+ File((phpBinDir / "php-parser.php").getPath)
+ .createFileIfNotExists()
+ .overwrite(s"")
+
+ val distDir = (Universal / stagingDirectory).value / "bin" / "php-parser"
+ distDir.mkdirs()
+ IO.copyDirectory(phpBinDir, distDir)
+}
+// Download php-parser: end
+
+// Also remove astgen and php-parser binaries with clean, e.g., to allow for updating them.
// Sadly, we can't define the bin/ folders globally,
// as .value can only be used within a task or setting macro
cleanFiles ++= Seq(
baseDirectory.value / "bin" / "astgen",
- (Universal / stagingDirectory).value / "bin" / "astgen"
+ (Universal / stagingDirectory).value / "bin" / "astgen",
+ baseDirectory.value / "bin" / "php-parser",
+ (Universal / stagingDirectory).value / "bin" / "php-parser"
) ++ astGenBinaryNames.map(fileName => SimpleCache.encodeFile(s"$astGenDlUrl$fileName"))
Compile / doc / sources := Seq.empty
Compile / packageDoc / publishArtifact := false
diff --git a/src/main/scala/ai/privado/entrypoint/CommandParser.scala b/src/main/scala/ai/privado/entrypoint/CommandParser.scala
index e1a6877f7..f29cc6022 100644
--- a/src/main/scala/ai/privado/entrypoint/CommandParser.scala
+++ b/src/main/scala/ai/privado/entrypoint/CommandParser.scala
@@ -43,6 +43,7 @@ case class PrivadoInput(
disableReadDataflow: Boolean = false,
enableAPIDisplay: Boolean = false,
enableLambdaFlows: Boolean = false,
+ enableAPIByParameter: Boolean = false,
ignoreExcludeRules: Boolean = false,
ignoreSinkSkipRules: Boolean = false,
skipUpload: Boolean = false,
@@ -85,6 +86,8 @@ object CommandConstants {
val ENABLE_API_DISPLAY_ABBR = "ead"
val ENABLE_LAMBDA_FLOWS = "enable-lambda-flows"
val ENABLE_LAMBDA_FLOWS_ABBR = "elf"
+ val ENABLE_API_BY_PARAMETER = "enable-api-by-parameter"
+ val ENABLE_API_BY_PARAMETER_ABBR = "eabyp"
val IGNORE_EXCLUDE_RULES = "ignore-exclude-rules"
val IGNORE_EXCLUDE_RULES_ABBR = "ier"
val UPLOAD = "upload"
@@ -211,6 +214,11 @@ object CommandParser {
.optional()
.action((_, c) => c.copy(enableLambdaFlows = true))
.text("Enable lambda flows"),
+ opt[Unit](CommandConstants.ENABLE_API_BY_PARAMETER)
+ .abbr(CommandConstants.ENABLE_API_BY_PARAMETER_ABBR)
+ .optional()
+ .action((_, c) => c.copy(enableAPIByParameter = true))
+ .text("Enable API tagging by parameter name match"),
opt[Unit](CommandConstants.IGNORE_EXCLUDE_RULES)
.abbr(CommandConstants.IGNORE_EXCLUDE_RULES_ABBR)
.optional()
diff --git a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
index 6c0d414e5..8740d03a0 100644
--- a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
+++ b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
@@ -31,6 +31,7 @@ import ai.privado.languageEngine.ruby.processor.RubyProcessor
import ai.privado.languageEngine.default.processor.DefaultProcessor
import ai.privado.languageEngine.kotlin.processor.KotlinProcessor
import ai.privado.languageEngine.go.processor.GoProcessor
+import ai.privado.languageEngine.php.processor.PhpProcessor
import ai.privado.metric.MetricHandler
import ai.privado.model.Language.Language
import ai.privado.model.*
@@ -430,6 +431,18 @@ object ScanProcessor extends CommandProcessor {
auditCache,
s3DatabaseDetailsCache
).processCpg()
+ case language if language == Languages.PHP =>
+ println(s"${Calendar.getInstance().getTime} - Detected language 'PHP'")
+ new PhpProcessor(
+ getProcessedRule(Set(Language.PHP)),
+ this.config,
+ sourceRepoLocation,
+ Language.PHP,
+ dataFlowCache = getDataflowCache,
+ auditCache,
+ s3DatabaseDetailsCache
+ )
+ .processCpg()
case _ =>
if (checkJavaSourceCodePresent(sourceRepoLocation)) {
println(
diff --git a/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala b/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala
index 17f0f42d7..a5aaf9546 100644
--- a/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala
+++ b/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala
@@ -38,6 +38,7 @@ class HttpConnectionMetadataExporter(cpg: Cpg, ruleCache: RuleCache) {
private val FEIGN_CLIENT = "FeignClient"
private val SPRING_ANNOTATION_ID = "Collections.Annotation.Spring"
private val STRING_START_WITH_SLASH = "/.{2,}"
+ private val STRING_CONTAINS_URL = ".*\\.[a-z]{2,5}/[a-z]{2,}.*"
private val STRING_CONTAINS_TWO_SLASH = ".*/.*/.*"
private val SPRING_APPLICATION_BASE_PATH =
"(?i)(server[.]servlet[.]context-path|server[.]servlet[.]contextPath)|(spring[.]application[.]name)"
@@ -45,7 +46,12 @@ class HttpConnectionMetadataExporter(cpg: Cpg, ruleCache: RuleCache) {
private val ALPHABET = "[a-zA-Z]"
private val STRING_WITH_CONSECUTIVE_DOTS_OR_DOT_SLASH_OR_NEWLINE = "(?s).*(\\.\\.|\\./|\n).*"
private val ESCAPE_STRING_SLASHES = "(\\\")"
- private val IMPORT_REGEX_WITH_SLASHES = "(?s)^(?=.*/)(?!.*/$).*"
+ // Regex to eliminate pattern ending with file suffix
+ // Demo: https://regex101.com/r/ojV93D/1
+ private val FILE_SUFFIX_REGEX_PATTERN = ".*[.][a-z]{2,5}(\\\")?$"
+ private val SUFFIX_PATTERN = "^(\\.\\/|\\.\\.|\\/\\/).*"
+ private val COMMON_FALSE_POSITIVE_EGRESS_PATTERN =
+ ".*(BEGIN PRIVATE KEY|sha512|googleapis|sha1|amazonaws|github||
|
p.name matches configRule).l
- propertySources.foreach(p => {
- metaData.put(p.name, p.value)
- })
+ cpg.property
+ .filter(p => p.name matches configRule)
+ .l
+ .map(p => {
+ HashMap(
+ Constants.name -> p.name,
+ Constants.value -> p.value,
+ Constants.filePath -> p.file.name.headOption.getOrElse("")
+ )
+ })
} catch {
- case ex: Exception => println("Error while fetching repo config metadata")
+ case ex: Exception => {
+ println("Error while fetching repo config metadata")
+ List()
+ }
}
- metaData
}
}
diff --git a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala
index 80fb39897..0325c778a 100644
--- a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala
+++ b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala
@@ -10,54 +10,66 @@ class JavaAnnotationPropertyLinkerPass(cpg: Cpg) extends PrivadoParallelCpgPass[
override def generateParts(): Array[_ <: AnyRef] = {
cpg.annotation
- .nameExact("Value")
+ .name(".*(Value|Named).*")
.filter(_.parameterAssign.nonEmpty)
.toArray
}
override def runOnPart(builder: DiffGraphBuilder, annotation: Annotation): Unit = {
- /** List of all parameters annotated with Spring's `Value` annotation, along with the property name.
- */
- if (annotation.parameterAssign.code("\\\"\\$\\{.*\\}\\\"").nonEmpty && annotation.parameter.nonEmpty) {
- val literalName = annotation.parameterAssign.code.head
- val value = Option(literalName.slice(3, literalName.length - 2)).getOrElse("")
+ if (annotation.name == "Value") {
+
+ /** List of all parameters annotated with Spring's `Value` annotation, along with the property name.
+ */
+ if (annotation.parameterAssign.code("\\\"\\$\\{.*\\}\\\"").nonEmpty && annotation.parameter.nonEmpty) {
+ val literalName = annotation.parameterAssign.code.head
+ val value = Option(literalName.slice(3, literalName.length - 2)).getOrElse("")
+ if (value.nonEmpty) {
+ cpg.property
+ .filter(p => p.name == value)
+ .foreach(p => {
+ connectEnvProperty(annotation.parameter.head, p, builder)
+ })
+ }
+ }
+
+ /** List of all parameters annotated with Spring's `Value` annotation, along with the property name.
+ */
+ if (annotation.member.nonEmpty) {
+ cpg.property
+ .filter(p =>
+ p.name == Option(
+ annotation.parameterAssign.head.code.slice(3, annotation.parameterAssign.head.code.length - 2)
+ ).getOrElse("")
+ )
+ .foreach(p => {
+ connectEnvProperty(annotation.member.head, p, builder)
+ })
+ }
+
+ /** List of all methods annotated with Spring's `Value` annotation, along with the method node
+ */
+ if (annotation.method.nonEmpty) {
+ val key = annotation.parameterAssign.head
+ cpg.property
+ .filter(p => p.name == Option(key.code.slice(3, key.code.length - 2)).getOrElse(""))
+ .foreach(p => {
+ val referenceMember = annotation.method.head.ast.fieldAccess.referencedMember.l.headOption.orNull
+ if (referenceMember != null) {
+ connectEnvProperty(referenceMember, p, builder)
+ }
+ })
+ }
+ } else if (annotation.name == "Named" && annotation.parameter.nonEmpty) {
+ val value = annotation.parameterAssign.code.head.split("[.]").lastOption.getOrElse("")
if (value.nonEmpty) {
cpg.property
- .filter(p => p.name == value)
+ .filter(p => p.name.endsWith(value))
.foreach(p => {
connectEnvProperty(annotation.parameter.head, p, builder)
})
}
}
-
- /** List of all parameters annotated with Spring's `Value` annotation, along with the property name.
- */
- if (annotation.member.nonEmpty) {
- cpg.property
- .filter(p =>
- p.name == Option(
- annotation.parameterAssign.head.code.slice(3, annotation.parameterAssign.head.code.length - 2)
- ).getOrElse("")
- )
- .foreach(p => {
- connectEnvProperty(annotation.member.head, p, builder)
- })
- }
-
- /** List of all methods annotated with Spring's `Value` annotation, along with the method node
- */
- if (annotation.method.nonEmpty) {
- val key = annotation.parameterAssign.head
- cpg.property
- .filter(p => p.name == Option(key.code.slice(3, key.code.length - 2)).getOrElse(""))
- .foreach(p => {
- val referenceMember = annotation.method.head.ast.fieldAccess.referencedMember.l.headOption.orNull
- if (referenceMember != null) {
- connectEnvProperty(referenceMember, p, builder)
- }
- })
- }
}
def connectEnvProperty(literalNode: AstNode, propertyNode: JavaProperty, builder: DiffGraphBuilder): Unit = {
@@ -65,3 +77,15 @@ class JavaAnnotationPropertyLinkerPass(cpg: Cpg) extends PrivadoParallelCpgPass[
builder.addEdge(literalNode, propertyNode, EdgeTypes.ORIGINAL_PROPERTY)
}
}
+
+//private def namedAnnotatedParameters(): List[(MethodParameterIn, String)] = cpg.annotation
+// .nameExact("Named")
+// .filter(_.parameter.nonEmpty)
+// .map { x =>
+// val value = x.parameterAssign.code.next().split("[.]").lastOption.getOrElse("")
+// (x.parameter.next(), value)
+// }
+// .filter { (_, value) =>
+// value.nonEmpty
+// }
+// .toList
diff --git a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala
@@ -0,0 +1 @@
+
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala
index 2af17d8c7..f65257cc9 100644
--- a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala
@@ -40,6 +40,7 @@ import ai.privado.languageEngine.java.tagger.collection.{
SOAPCollectionTagger
}
import ai.privado.languageEngine.java.tagger.config.JavaDBConfigTagger
+import ai.privado.languageEngine.java.tagger.sink.api.JavaAPISinkTagger
import ai.privado.languageEngine.java.tagger.sink.{InheritMethodTagger, JavaAPITagger, MessagingConsumerCustomTagger}
import ai.privado.languageEngine.java.tagger.source.{IdentifierTagger, InSensitiveCallTagger}
import ai.privado.tagger.PrivadoBaseTagger
@@ -82,6 +83,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
new JavaS3Tagger(cpg, s3DatabaseDetailsCache).createAndApply()
+ JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig)
+
new JavaAPITagger(cpg, ruleCache, privadoInputConfig).createAndApply()
// Custom Rule tagging
if (!privadoInputConfig.ignoreInternalRules) {
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala
index 964f1b334..7fd979c83 100644
--- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala
@@ -201,7 +201,9 @@ class FeignAPI(cpg: Cpg, ruleCache: RuleCache) {
apiCalls.foreach(apiNode => {
val domain = getDomainFromString(apiLiteral)
val newRuleIdToUse = ruleInfo.id + "." + domain
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true)
+ )
addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, apiLiteral)
})
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala
index 23195447c..e31530ee6 100644
--- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala
@@ -28,7 +28,7 @@ import ai.privado.languageEngine.java.language.*
import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator
import ai.privado.languageEngine.java.tagger.Utility.{GRPCTaggerUtility, SOAPTaggerUtility}
import ai.privado.metric.MetricHandler
-import ai.privado.model.{Constants, Language, NodeType, RuleInfo}
+import ai.privado.model.{Constants, InternalTag, Language, NodeType, RuleInfo}
import ai.privado.tagger.PrivadoParallelCpgPass
import ai.privado.tagger.utility.APITaggerUtility.{SERVICE_URL_REGEX_PATTERN, sinkTagger}
import ai.privado.utility.{ImportUtility, Utilities}
@@ -96,13 +96,17 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI
APITaggerVersionJava.V2Tagger
}
+ apis = apis.whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l
+
val commonHttpPackages: String = ruleCache.getSystemConfigByKey(Constants.apiHttpLibraries)
- val grpcSinks = GRPCTaggerUtility.getGrpcSinks(cpg)
- val soapSinks = SOAPTaggerUtility.getAPICallNodes(cpg)
+ val grpcSinks = GRPCTaggerUtility.getGrpcSinks(cpg).whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l
+ val soapSinks =
+ SOAPTaggerUtility.getAPICallNodes(cpg).whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l
override def generateParts(): Array[_ <: AnyRef] = {
ruleCache.getAllRuleInfo
.filter(rule => rule.nodeType.equals(NodeType.API))
+ .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules
.toArray
}
@@ -133,7 +137,12 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI
)
else
List()
- }
+ }.whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l
+
+ val markedAPISinks = cpg.call
+ .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString))
+ .whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString))
+ .l
apiTaggerToUse match {
case APITaggerVersionJava.V1Tagger =>
@@ -150,7 +159,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI
)
sinkTagger(
apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource,
- feignAPISinks ++ grpcSinks ++ soapSinks,
+ feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks,
builder,
ruleInfo,
ruleCache,
@@ -161,7 +170,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI
println(s"${Calendar.getInstance().getTime} - --API TAGGER V2 invoked...")
sinkTagger(
apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource,
- apis.methodFullName(commonHttpPackages).l ++ feignAPISinks ++ grpcSinks ++ soapSinks,
+ apis.methodFullName(commonHttpPackages).l ++ feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks,
builder,
ruleInfo,
ruleCache,
@@ -172,7 +181,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI
println(s"${Calendar.getInstance().getTime} - --API TAGGER SKIPPED, applying Feign client API...")
sinkTagger(
apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource,
- feignAPISinks ++ grpcSinks ++ soapSinks,
+ feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks,
builder,
ruleInfo,
ruleCache,
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala
new file mode 100644
index 000000000..d89335984
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala
@@ -0,0 +1,28 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.model.{Constants, InternalTag}
+import ai.privado.tagger.{PrivadoParallelCpgPass, PrivadoSimpleCpgPass}
+import io.shiftleft.codepropertygraph.generated.{Cpg, Operators}
+import io.shiftleft.codepropertygraph.generated.nodes.Call
+import io.shiftleft.semanticcpg.language.*
+import ai.privado.utility.Utilities.{addRuleTags, storeForTag}
+
+import scala.jdk.CollectionConverters.CollectionHasAsScala
+import scala.language.postfixOps
+
+class JavaAPISinkByMethodFullNameTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoSimpleCpgPass(cpg) {
+
+ private val apiMethodFullNameRegex = ruleCache.getSystemConfigByKey(Constants.apiMethodFullNames)
+
+ val cacheCall: List[Call] = cpg.call.or(_.nameNot(Operators.ALL.asScala.toSeq.appended("init"): _*)).l
+ override def run(builder: DiffGraphBuilder): Unit = {
+ if (apiMethodFullNameRegex.nonEmpty) {
+ val sinkCalls = cacheCall.methodFullName(apiMethodFullNameRegex).toArray
+
+ // Mark the nodes as API sink
+ sinkCalls.foreach(storeForTag(builder, _, ruleCache)(InternalTag.API_SINK_MARKED.toString))
+ }
+ }
+
+}
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala
new file mode 100644
index 000000000..340597ea0
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala
@@ -0,0 +1,64 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.model.{InternalTag, RuleInfo}
+import ai.privado.tagger.PrivadoParallelCpgPass
+import io.shiftleft.codepropertygraph.generated.{Cpg, Operators}
+import io.shiftleft.semanticcpg.language.*
+
+import scala.jdk.CollectionConverters.CollectionHasAsScala
+import ai.privado.utility.Utilities.storeForTag
+
+class JavaAPISinkByParameterTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCpgPass[String](cpg) {
+ override def generateParts(): Array[String] = {
+
+ /* Below query looks for methods whose parameter names ends with `url|endpoint`,
+ for such method, get the typeFullName of the returned object by this method
+ */
+
+ val typeFullNameByUrlLikeMatch = cpg.method
+ .where(_.parameter.filter(_.index != 0).name("(?i).*(url|endpoint)"))
+ .signature
+ .map(_.split("\\(").headOption.getOrElse(""))
+ .filter(_.nonEmpty)
+ .l
+
+ /* Below query looks for methods whose parameter names ends with `config`, and is part of a constructor
+ for such method, get the typeFullName of the object for which this constructor is added
+ */
+ val typeFullNameByConfigLikeMatch = cpg.method
+ .where(_.parameter.filter(_.index != 0).name("(?i).*(config)"))
+ .where(_.name(""))
+ .fullName
+ .map(_.split("[.]
+ val memberName = m.name
+ val fileName = m.file.name.headOption.getOrElse("")
+
+ /* Below query looks for fieldIdentifier in the given file and return calls which are made on top of this fieldIdentifier
+ The 1st callIn returns the field Access node and the 2nd returns the actual call
+ */
+
+ val sinkCalls = cpg.fieldAccess.fieldIdentifier
+ .canonicalName(memberName)
+ .where(_.file.nameExact(fileName))
+ .l
+ .inCall
+ .inCall
+ .where(_.nameNot(Operators.ALL.asScala.toSeq.appended(""): _*))
+ .l
+
+ // Mark the nodes as API sink
+ sinkCalls.foreach(storeForTag(builder, _, ruleCache)(InternalTag.API_SINK_MARKED.toString))
+
+ }
+ }
+}
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala
new file mode 100644
index 000000000..23799bcc7
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala
@@ -0,0 +1,109 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.model.{Constants, InternalTag, NodeType, RuleInfo}
+import ai.privado.tagger.PrivadoParallelCpgPass
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.semanticcpg.language.*
+import ai.privado.languageEngine.java.language.*
+import ai.privado.tagger.utility.APITaggerUtility.{
+ getLiteralCode,
+ resolveDomainFromSource,
+ tagAPIWithDomainAndUpdateRuleCache
+}
+import ai.privado.utility.Utilities.{addRuleTags, getDomainFromString, storeForTag}
+import io.shiftleft.codepropertygraph.generated.nodes.AstNode
+
+class JavaAPISinkEndpointMapperByNonInitMethod(cpg: Cpg, ruleCache: RuleCache)
+ extends PrivadoParallelCpgPass[String](cpg) {
+
+ private val methodFullNameSplitter = "[:(]"
+
+ private val apiMatchingRegex =
+ ruleCache.getAllRuleInfo.filter(_.nodeType == NodeType.API).map(_.combinedRulePattern).mkString("(", "|", ")")
+
+ private val thirdPartyRuleInfo = ruleCache.getRuleInfo(Constants.thirdPartiesAPIRuleId)
+ override def generateParts(): Array[String] = {
+
+ /* General assumption - there is a function which creates a client, and the usage of the client and binding
+ happens via dependency injection which can very according to the framework used.
+ If we identify such a function and can point to the usage of a particular endpoint in it,
+ we can say the client uses the following endpoint
+ */
+
+ if (thirdPartyRuleInfo.isDefined) {
+ cpg.call
+ .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString))
+ .methodFullName
+ .map(_.split(methodFullNameSplitter).headOption.getOrElse(""))
+ .filter(_.nonEmpty)
+ .map { methodNamespace =>
+ val parts = methodNamespace.split("[.]")
+ if parts.nonEmpty then parts.dropRight(1).mkString(".") else ""
+ }
+ .dedup
+ .toArray
+ } else
+ Array[String]()
+ }
+
+ override def runOnPart(builder: DiffGraphBuilder, typeFullName: String): Unit = {
+
+ cpg.method.signature(s"$typeFullName$methodFullNameSplitter.*").foreach { clientReturningMethod =>
+ val matchingProperties = clientReturningMethod.ast.originalProperty.value(apiMatchingRegex).dedup.l
+
+ val impactedApiCalls = cpg.call
+ .methodFullName(s"$typeFullName.*")
+ .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString))
+ .l
+
+ if (matchingProperties.nonEmpty) {
+ matchingProperties.foreach { propertyNode =>
+ val domain = getDomainFromString(propertyNode.value)
+ impactedApiCalls.foreach { apiCall =>
+ tagAPIWithDomainAndUpdateRuleCache(
+ builder,
+ thirdPartyRuleInfo.get,
+ ruleCache,
+ domain,
+ apiCall,
+ propertyNode
+ )
+ storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString)
+ }
+ }
+ } else { // There is no property node available to be used, try with parameter
+ val variableRegex = ruleCache.getSystemConfigByKey(Constants.apiIdentifier)
+ val matchingParameters = clientReturningMethod.parameter.name(variableRegex).l
+
+ if (matchingParameters.nonEmpty) {
+ matchingParameters.foreach { parameter =>
+ val domain = resolveDomainFromSource(parameter)
+ impactedApiCalls.foreach { apiCall =>
+ tagAPIWithDomainAndUpdateRuleCache(builder, thirdPartyRuleInfo.get, ruleCache, domain, apiCall, parameter)
+ storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString)
+ }
+ }
+ } else { // There is no matching parameter to be used, try with identifier
+ val matchingIdentifiers = clientReturningMethod.ast.isIdentifier.name(variableRegex).l
+ if (matchingIdentifiers.nonEmpty) {
+ matchingIdentifiers.foreach { identifier =>
+ val domain = resolveDomainFromSource(identifier)
+ impactedApiCalls.foreach { apiCall =>
+ tagAPIWithDomainAndUpdateRuleCache(
+ builder,
+ thirdPartyRuleInfo.get,
+ ruleCache,
+ domain,
+ apiCall,
+ identifier
+ )
+ storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString)
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala
new file mode 100644
index 000000000..f7536de3f
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala
@@ -0,0 +1,25 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.entrypoint.PrivadoInput
+import ai.privado.tagger.sink.api.APISinkTagger
+import io.shiftleft.codepropertygraph.generated.Cpg
+
+object JavaAPISinkTagger extends APISinkTagger {
+
+ /** Wrapper method to tag all the api taggers
+ * @param cpg
+ * @param ruleCache
+ */
+ override def applyTagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput): Unit = {
+
+ if (privadoInput.enableAPIByParameter)
+ new JavaAPISinkByParameterTagger(cpg, ruleCache).createAndApply()
+
+ new JavaAPISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply()
+
+ // Invoke API Endpoint mappers
+ new JavaAPISinkEndpointMapperByNonInitMethod(cpg, ruleCache).createAndApply()
+ }
+
+}
diff --git a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala
index 4a68f5992..84d0f5f4d 100644
--- a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala
+++ b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala
@@ -76,7 +76,9 @@ object JavascriptProcessor {
new JsonPropertyParserPass(cpg, s"$sourceRepoLocation/${Constants.generatedConfigFolderName}")
.createAndApply()
new JsConfigPropertyPass(cpg).createAndApply()
- } else new PropertyParserPass(cpg, sourceRepoLocation, ruleCache, Language.JAVASCRIPT).createAndApply()
+ } else
+ new PropertyParserPass(cpg, sourceRepoLocation, ruleCache, Language.JAVASCRIPT, privadoInput)
+ .createAndApply()
new JSEnvPropertyLinkerPass(cpg).createAndApply()
new SQLParser(cpg, sourceRepoLocation, ruleCache).createAndApply()
new DBTParserPass(cpg, sourceRepoLocation, ruleCache).createAndApply()
diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala
index 204d19057..ce9559c9e 100644
--- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala
@@ -61,7 +61,9 @@ class GraphqlAPITagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCp
if (isReadAPI) {
val newRuleIdToUse = ruleInfo.id + Constants.READ_WITH_BRACKETS
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.READ_WITH_BRACKETS))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.READ_WITH_BRACKETS, isGenerated = true)
+ )
addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
storeForTag(builder, apiNode, ruleCache)(
Constants.apiUrl + newRuleIdToUse,
@@ -70,7 +72,7 @@ class GraphqlAPITagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCp
} else if (isWriteAPI) {
val newRuleIdToUse = ruleInfo.id + Constants.WRITE_WITH_BRACKETS
ruleCache.setRuleInfo(
- ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.WRITE_WITH_BRACKETS)
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.WRITE_WITH_BRACKETS, isGenerated = true)
)
addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
storeForTag(builder, apiNode, ruleCache)(
diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala
index 99c50e4dc..b7bfab394 100644
--- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala
@@ -103,7 +103,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, scriptTag, ruleInfo, ruleCache)
else {
newRuleIdToUse = ruleInfo.id + "." + domain._2
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true)
+ )
addRuleTags(builder, scriptTag, ruleInfo, ruleCache, Some(newRuleIdToUse))
}
storeForTag(builder, scriptTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1)
@@ -113,7 +115,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
if (!identifierDomain.equals(Constants.UnknownDomain)) {
if (!ruleInfo.id.equals(Constants.internalAPIRuleId)) {
newRuleIdToUse = ruleInfo.id + "." + identifierDomain
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + identifierDomain))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + identifierDomain, isGenerated = true)
+ )
addRuleTags(builder, scriptTag, ruleInfo, ruleCache, Some(newRuleIdToUse))
storeForTag(builder, scriptTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, identifierDomain)
}
@@ -131,7 +135,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, externalScriptCall, ruleInfo, ruleCache)
else {
newRuleIdToUse = ruleInfo.id + "." + domain._2
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true)
+ )
addRuleTags(builder, externalScriptCall, ruleInfo, ruleCache, Some(newRuleIdToUse))
}
storeForTag(builder, externalScriptCall, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1)
@@ -166,7 +172,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, callTag, ruleInfo, ruleCache)
else {
newRuleIdToUse = ruleInfo.id + "." + domain._2
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true)
+ )
addRuleTags(builder, callTag, ruleInfo, ruleCache, Some(newRuleIdToUse))
}
storeForTag(builder, callTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1)
@@ -190,7 +198,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
// Tagging the node with respective domain
val newRuleIdToUse = ruleInfo.id + "." + domain
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true)
+ )
addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain)
diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala
index c7b0427a0..41a6cc1bf 100644
--- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala
@@ -88,7 +88,9 @@ class RegularSinkTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelC
case node => node.code
}).stripPrefix("\"").stripSuffix("\"")
val newRuleIdToUse = ruleInfo.id + "." + cookieName
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + cookieName))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + cookieName, isGenerated = true)
+ )
addRuleTags(builder, sink, ruleInfo, ruleCache, Some(newRuleIdToUse))
DatabaseDetailsCache.addDatabaseDetails(
DatabaseDetails(
diff --git a/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala
new file mode 100644
index 000000000..13fad4223
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala
@@ -0,0 +1,115 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+
+package ai.privado.languageEngine.php.processor
+
+import ai.privado.cache.*
+import ai.privado.entrypoint.{PrivadoInput, TimeMetric}
+import ai.privado.languageEngine.base.processor.BaseProcessor
+import ai.privado.languageEngine.php.semantic.Language.tagger
+import ai.privado.model.Constants.*
+import ai.privado.model.Language.Language
+import ai.privado.utility.Utilities.createCpgFolder
+import io.joern.php2cpg.{Config, Php2Cpg}
+import io.joern.x2cpg.X2Cpg.applyDefaultOverlays
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.passes.CpgPassBase
+import org.slf4j.{Logger, LoggerFactory}
+
+import java.io.File
+import java.nio.file.Paths
+import java.util.Calendar
+
+class PhpProcessor(
+ ruleCache: RuleCache,
+ privadoInput: PrivadoInput,
+ sourceRepoLocation: String,
+ lang: Language,
+ dataFlowCache: DataFlowCache,
+ auditCache: AuditCache,
+ s3DatabaseDetailsCache: S3DatabaseDetailsCache
+) extends BaseProcessor(
+ ruleCache,
+ privadoInput,
+ sourceRepoLocation,
+ lang,
+ dataFlowCache,
+ auditCache,
+ s3DatabaseDetailsCache
+ ) {
+
+ override val logger: Logger = LoggerFactory.getLogger(this.getClass)
+
+ override def applyPrivadoPasses(cpg: Cpg): List[CpgPassBase] = List[CpgPassBase]()
+
+ override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit =
+ cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache)
+
+ override def applyDataflowAndPostProcessingPasses(cpg: Cpg): Unit = {
+ super.applyDataflowAndPostProcessingPasses(cpg)
+ Php2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply())
+ }
+
+ override def processCpg(): Either[String, Unit] = {
+ println(s"${Calendar.getInstance().getTime} - Processing source code using $lang engine")
+
+ createCpgFolder(sourceRepoLocation)
+
+ val cpgOutput = Paths.get(sourceRepoLocation, outputDirectoryName, cpgOutputFileName)
+ val cpgConfig = Config()
+ .withInputPath(sourceRepoLocation)
+ .withOutputPath(cpgOutput.toString)
+ .withIgnoredFilesRegex(ruleCache.getExclusionRegex)
+ .withPhpParserBin(PhpProcessor.parserBinPath)
+
+ val xtocpg = new Php2Cpg().createCpg(cpgConfig).map { cpg =>
+ println(
+ s"${TimeMetric.getNewTime()} - Base processing done in \t\t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
+ )
+
+ applyDefaultOverlays(cpg)
+ cpg
+ }
+
+ tagAndExport(xtocpg)
+ }
+}
+
+object PhpProcessor {
+ val parserBinPath: String = {
+ val dir = getClass.getProtectionDomain.getCodeSource.getLocation.toString
+ val indexOfLib = dir.lastIndexOf("lib")
+ val fixedDir = if (indexOfLib != -1) {
+ new File(dir.substring("file:".length, indexOfLib)).toString
+ } else {
+ val indexOfTarget = dir.lastIndexOf("target")
+ if (indexOfTarget != -1) {
+ new File(dir.substring("file:".length, indexOfTarget)).toString
+ } else {
+ "."
+ }
+ }
+
+ Paths.get(fixedDir, "/bin/php-parser/php-parser.php").toAbsolutePath.toString
+ }
+}
diff --git a/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala b/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala
new file mode 100644
index 000000000..283e89171
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala
@@ -0,0 +1,31 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+
+package ai.privado.languageEngine.php.semantic
+
+import ai.privado.languageEngine.php.tagger.PrivadoTagger
+import io.shiftleft.codepropertygraph.generated.Cpg
+
+object Language {
+ implicit def tagger(cpg: Cpg): PrivadoTagger = new PrivadoTagger(cpg)
+}
diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala
new file mode 100644
index 000000000..2ab1e0507
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala
@@ -0,0 +1,56 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+
+package ai.privado.languageEngine.php.tagger
+
+import ai.privado.cache.{DataFlowCache, RuleCache, TaggerCache}
+import ai.privado.entrypoint.PrivadoInput
+import ai.privado.languageEngine.php.tagger.source.IdentifierTagger
+import ai.privado.tagger.PrivadoBaseTagger
+import ai.privado.tagger.sink.RegularSinkTagger
+import ai.privado.tagger.source.LiteralTagger
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.codepropertygraph.generated.nodes.Tag
+import io.shiftleft.semanticcpg.language.*
+import org.slf4j.LoggerFactory
+import overflowdb.traversal.Traversal
+
+class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
+ private val logger = LoggerFactory.getLogger(this.getClass)
+
+ override def runTagger(
+ rules: RuleCache,
+ taggerCache: TaggerCache,
+ privadoInputConfig: PrivadoInput,
+ dataFlowCache: DataFlowCache
+ ): Traversal[Tag] = {
+ logger.info("Beginning tagging")
+
+ new LiteralTagger(cpg, rules).createAndApply()
+ new IdentifierTagger(cpg, rules, taggerCache).createAndApply()
+ new RegularSinkTagger(cpg, rules).createAndApply()
+
+ logger.info("Finished tagging")
+ cpg.tag
+ }
+}
diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala
new file mode 100644
index 000000000..e8cd8e99c
--- /dev/null
+++ b/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala
@@ -0,0 +1,200 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+
+package ai.privado.languageEngine.php.tagger.source
+
+import ai.privado.cache.{RuleCache, TaggerCache}
+import ai.privado.languageEngine.java.tagger.source.Utility.{
+ getCallsMatchingReturnRegex,
+ getFieldAccessCallsMatchingRegex
+}
+import ai.privado.model.{CatLevelOne, Constants, InternalTag, RuleInfo}
+import ai.privado.tagger.PrivadoParallelCpgPass
+import ai.privado.utility.Utilities.{addRuleTags, storeForTag}
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.semanticcpg.language.*
+import overflowdb.BatchedUpdate
+
+import java.util.UUID
+
+class IdentifierTagger(cpg: Cpg, ruleCache: RuleCache, taggerCache: TaggerCache)
+ extends PrivadoParallelCpgPass[RuleInfo](cpg) {
+ lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME: String = UUID.randomUUID.toString
+ lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE: String = UUID.randomUUID.toString
+ lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_EXTENDING_TYPE: String = UUID.randomUUID.toString
+
+ override def generateParts(): Array[RuleInfo] = ruleCache.getRule.sources.toArray
+
+ override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = {
+ val rulePattern = ruleInfo.combinedRulePattern
+ val regexMatchingIdentifiers = cpg.identifier(rulePattern).l
+ regexMatchingIdentifiers.foreach(identifier => {
+ storeForTag(builder, identifier, ruleCache)(InternalTag.VARIABLE_REGEX_IDENTIFIER.toString)
+ addRuleTags(builder, identifier, ruleInfo, ruleCache)
+ })
+
+ val regexMatchingFieldIdentifiersIdentifiers =
+ cpg.fieldAccess.where(_.fieldIdentifier.canonicalName(rulePattern)).isCall.l
+ regexMatchingFieldIdentifiersIdentifiers.foreach(identifier => {
+ storeForTag(builder, identifier, ruleCache)(InternalTag.VARIABLE_REGEX_IDENTIFIER.toString)
+ addRuleTags(builder, identifier, ruleInfo, ruleCache)
+ })
+
+ val regexMatchingMembers = cpg.member.name(rulePattern).l
+ regexMatchingMembers.foreach(member => {
+ storeForTag(builder, member, ruleCache)(InternalTag.VARIABLE_REGEX_MEMBER.toString)
+ addRuleTags(builder, member, ruleInfo, ruleCache)
+ })
+
+ tagObjectOfTypeDeclHavingMemberName(builder, rulePattern, ruleInfo)
+ }
+
+ /** Tag identifier of all the typeDeclaration who have a member as memberName in argument Represent Step 2.1
+ */
+ private def tagObjectOfTypeDeclHavingMemberName(
+ builder: BatchedUpdate.DiffGraphBuilder,
+ memberNameRegex: String,
+ ruleInfo: RuleInfo
+ ): Unit = {
+ val typeDeclWithMemberNameHavingMemberName = cpg.typeDecl
+ .where(_.member.name(memberNameRegex).filterNot(item => item.name.equals(item.name.toUpperCase)))
+ .map(typeDeclNode => (typeDeclNode, typeDeclNode.member.name(memberNameRegex).l))
+ .l
+ typeDeclWithMemberNameHavingMemberName
+ .distinctBy(_._1.fullName)
+ .foreach(typeDeclValEntry => {
+ typeDeclValEntry._2.foreach(typeDeclMember => {
+ val typeDeclFullName = typeDeclValEntry._1.fullName
+ // updating cache
+ taggerCache.addItemToTypeDeclMemberCache(typeDeclFullName, ruleInfo.id, typeDeclMember)
+ val typeDeclMemberName = typeDeclMember.name
+ // Have started tagging Parameters as well, as in collection points sometimes there is no referencing Identifier present for a local
+ val impactedObjects =
+ cpg.identifier
+ .where(_.typeFullName(typeDeclFullName))
+ .l ::: cpg.parameter.where(_.typeFullName(typeDeclFullName)).l
+
+ impactedObjects
+ .foreach(impactedObject => {
+ if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) {
+ storeForTag(builder, impactedObject, ruleCache)(
+ InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString,
+ ruleInfo.id
+ )
+ storeForTag(builder, impactedObject, ruleCache)(
+ Constants.id,
+ Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME
+ )
+ storeForTag(builder, impactedObject, ruleCache)(Constants.catLevelOne, CatLevelOne.DERIVED_SOURCES.name)
+ }
+ storeForTag(builder, impactedObject, ruleCache)(
+ Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME,
+ ruleInfo.id
+ )
+ // Tag for storing memberName in derived Objects -> user --> (email, password)
+ storeForTag(builder, impactedObject, ruleCache)(
+ ruleInfo.id + Constants.underScore + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME,
+ typeDeclMemberName
+ )
+ })
+
+ // To Mark all field Access and getters
+ tagAllFieldAccessAndGetters(builder, typeDeclFullName, ruleInfo, typeDeclMemberName)
+ })
+ })
+
+ typeDeclWithMemberNameHavingMemberName
+ .distinctBy(_._1.fullName)
+ .foreach(typeDeclValEntry => {
+ val typeDeclName = typeDeclValEntry._1.fullName
+ // Step 2.2
+ tagObjectOfTypeDeclHavingMemberType(builder, typeDeclName, ruleInfo)
+ })
+ }
+
+ /** Tag identifier of all the typeDeclaration who have a member of type -> memberType in argument Represent Step 2.2
+ */
+ private def tagObjectOfTypeDeclHavingMemberType(
+ builder: BatchedUpdate.DiffGraphBuilder,
+ memberType: String,
+ ruleInfo: RuleInfo
+ ): Unit = {
+ // stores tuple(Member, TypeDeclFullName)
+ val typeDeclHavingMemberTypeTuple =
+ cpg.typeDecl.member.typeFullName(memberType).map(member => (member, member.typeDecl.fullName)).dedup.l
+ typeDeclHavingMemberTypeTuple.foreach(typeDeclTuple => {
+ val typeDeclVal = typeDeclTuple._2
+ val typeDeclMember = typeDeclTuple._1
+ taggerCache.addItemToTypeDeclMemberCache(typeDeclVal, ruleInfo.id, typeDeclMember)
+ val impactedObjects =
+ cpg.identifier.where(_.typeFullName(typeDeclVal)).l ::: cpg.parameter
+ .where(_.typeFullName(typeDeclVal))
+ .l
+ impactedObjects.foreach(impactedObject => {
+ if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) {
+ storeForTag(builder, impactedObject, ruleCache)(InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_TYPE.toString)
+ storeForTag(builder, impactedObject, ruleCache)(
+ Constants.id,
+ Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE
+ )
+ storeForTag(builder, impactedObject, ruleCache)(Constants.catLevelOne, CatLevelOne.DERIVED_SOURCES.name)
+ }
+ storeForTag(builder, impactedObject, ruleCache)(
+ Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE,
+ ruleInfo.id
+ )
+ })
+
+ // To Mark all field Access and getters
+ tagAllFieldAccessAndGetters(builder, typeDeclVal, ruleInfo, typeDeclMember.name)
+ })
+ }
+
+ /** Function to tag all the field access operations and all the methods whose return code matches the member regex
+ *
+ * @param builder
+ * @param typeDeclVal
+ * @param memberNameRegex
+ * @param ruleInfo
+ * @param typeDeclMemberName
+ */
+ private def tagAllFieldAccessAndGetters(
+ builder: BatchedUpdate.DiffGraphBuilder,
+ typeDeclVal: String,
+ ruleInfo: RuleInfo,
+ typeDeclMemberName: String
+ ): Unit = {
+ val impactedGetters = getFieldAccessCallsMatchingRegex(cpg, typeDeclVal, s"($typeDeclMemberName)")
+ .filterNot(item => item.code.equals(item.code.toUpperCase))
+
+ impactedGetters.foreach(impactedGetter => {
+ storeForTag(builder, impactedGetter, ruleCache)(InternalTag.SENSITIVE_FIELD_ACCESS.toString)
+ addRuleTags(builder, impactedGetter, ruleInfo, ruleCache)
+ })
+
+ val impactedReturnMethods = getCallsMatchingReturnRegex(cpg, typeDeclVal, s"($typeDeclMemberName)")
+ impactedReturnMethods
+ .foreach(storeForTag(builder, _, ruleCache)(InternalTag.SENSITIVE_METHOD_RETURN.toString, ruleInfo.id))
+
+ }
+}
diff --git a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala
index 2596cdcf4..71e681608 100644
--- a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala
@@ -56,6 +56,7 @@ class PythonAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput
override def generateParts(): Array[_ <: AnyRef] = {
ruleCache.getRule.sinks
.filter(rule => rule.nodeType.equals(NodeType.API))
+ .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules
.toArray
}
diff --git a/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala
index 28d8d971d..1e2af2eb9 100644
--- a/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala
+++ b/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala
@@ -47,6 +47,7 @@ class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
override def generateParts(): Array[_ <: AnyRef] = {
ruleCache.getRule.sinks
.filter(rule => rule.nodeType.equals(NodeType.API))
+ .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules
.toArray
}
diff --git a/src/main/scala/ai/privado/model/Config.scala b/src/main/scala/ai/privado/model/Config.scala
index fa419e23b..5d4a00074 100644
--- a/src/main/scala/ai/privado/model/Config.scala
+++ b/src/main/scala/ai/privado/model/Config.scala
@@ -42,7 +42,8 @@ case class RuleInfo(
catLevelOne: CatLevelOne.CatLevelOne,
catLevelTwo: String,
language: Language.Language,
- categoryTree: Array[String]
+ categoryTree: Array[String],
+ isGenerated: Boolean = false // mark this true, if the rule is generated by privado-core
) {
def combinedRulePattern: String = {
patterns.mkString("(", "|", ")")
diff --git a/src/main/scala/ai/privado/model/Constants.scala b/src/main/scala/ai/privado/model/Constants.scala
index bc317e499..2e4acc5f5 100644
--- a/src/main/scala/ai/privado/model/Constants.scala
+++ b/src/main/scala/ai/privado/model/Constants.scala
@@ -54,6 +54,7 @@ object Constants {
val value = "value"
val MAX_SOCKET_COUNT = "maxSocketCount"
val RepoPropertyConfig = "RepoPropertyConfig"
+ val filePath = "filePath"
val third_parties = "third_parties"
val internal_apis = "internal_apis"
@@ -157,6 +158,7 @@ object Constants {
val cookieSourceRuleId = "Data.Sensitive.OnlineIdentifiers.Cookies"
val ignoredSinks = "ignoredSinks"
val apiSinks = "apiSinks"
+ val apiMethodFullNames = "apiMethodFullNames"
val apiHttpLibraries = "apiHttpLibraries"
val apiIdentifier = "apiIdentifier"
val apiGraphqlLibraries = "apiGraphqlLibraries"
diff --git a/src/main/scala/ai/privado/model/PrivadoTag.scala b/src/main/scala/ai/privado/model/PrivadoTag.scala
index b3d73415f..2beb744c2 100644
--- a/src/main/scala/ai/privado/model/PrivadoTag.scala
+++ b/src/main/scala/ai/privado/model/PrivadoTag.scala
@@ -45,6 +45,10 @@ object InternalTag extends Enumeration {
val PROBABLE_ASSET = Value("PROBABLE_ASSET")
val SOURCE_PROPERTY = Value("SOURCE_PROPERTY")
+ // API Tags
+ val API_SINK_MARKED = Value("API_SINK_MARKED")
+ val API_URL_MARKED = Value("API_URL_MARKED")
+
lazy val valuesAsString = InternalTag.values.map(value => value.toString())
}
@@ -99,6 +103,7 @@ object Language extends Enumeration {
val RUBY = Value("ruby")
val KOTLIN = Value("kotlin")
val GO = Value("go")
+ val PHP = Value("php")
val CSHARP = Value("csharp")
val DEFAULT = Value("default")
val UNKNOWN = Value("unknown")
diff --git a/src/main/scala/ai/privado/passes/DBTParserPass.scala b/src/main/scala/ai/privado/passes/DBTParserPass.scala
index fd0ccc625..de57cba7a 100644
--- a/src/main/scala/ai/privado/passes/DBTParserPass.scala
+++ b/src/main/scala/ai/privado/passes/DBTParserPass.scala
@@ -171,7 +171,8 @@ class DBTParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache) extends
CatLevelOne.SINKS,
"storages",
Language.DEFAULT,
- Array[String]()
+ Array[String](),
+ true
)
val dbDetails = DatabaseDetails(dbName, dbPlatform, dbHost, "", "", Some(schema))
diff --git a/src/main/scala/ai/privado/passes/PropertyParserPass.scala b/src/main/scala/ai/privado/passes/PropertyParserPass.scala
index 2d9e66729..bc2081244 100644
--- a/src/main/scala/ai/privado/passes/PropertyParserPass.scala
+++ b/src/main/scala/ai/privado/passes/PropertyParserPass.scala
@@ -4,6 +4,7 @@ import io.shiftleft.codepropertygraph.generated.EdgeTypes
import io.shiftleft.codepropertygraph.generated.nodes.NewJavaProperty
import overflowdb.BatchedUpdate
import ai.privado.cache.RuleCache
+import ai.privado.entrypoint.PrivadoInput
import io.joern.x2cpg.SourceFiles
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.NewFile
@@ -45,8 +46,13 @@ object FileExtensions {
val CONF = ".conf"
}
-class PropertyParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache, language: Language.Value)
- extends PrivadoParallelCpgPass[String](cpg) {
+class PropertyParserPass(
+ cpg: Cpg,
+ projectRoot: String,
+ ruleCache: RuleCache,
+ language: Language.Value,
+ privadoInput: PrivadoInput = PrivadoInput()
+) extends PrivadoParallelCpgPass[String](cpg) {
val PLACEHOLDER_TOKEN_START_END = "@@"
val logger = LoggerFactory.getLogger(getClass)
@@ -65,10 +71,14 @@ class PropertyParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache, la
).toArray
}
case Language.JAVASCRIPT =>
- configFiles(
- projectRoot,
- Set(FileExtensions.JSON, FileExtensions.ENV, FileExtensions.YML, FileExtensions.YAML)
- ).toArray
+ if (privadoInput.enableIngressAndEgressUrls) {
+ configFiles(
+ projectRoot,
+ Set(FileExtensions.JSON, FileExtensions.ENV, FileExtensions.YAML, FileExtensions.YML)
+ ).toArray
+ } else {
+ configFiles(projectRoot, Set(FileExtensions.JSON, FileExtensions.ENV)).toArray
+ }
case Language.PYTHON =>
configFiles(
projectRoot,
diff --git a/src/main/scala/ai/privado/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/tagger/sink/APITagger.scala
index 6655263b3..2a85f85ff 100644
--- a/src/main/scala/ai/privado/tagger/sink/APITagger.scala
+++ b/src/main/scala/ai/privado/tagger/sink/APITagger.scala
@@ -58,6 +58,7 @@ class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput)
override def generateParts(): Array[_ <: AnyRef] = {
ruleCache.getRule.sinks
.filter(rule => rule.nodeType.equals(NodeType.API))
+ .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules
.toArray
}
override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = {
diff --git a/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala b/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala
new file mode 100644
index 000000000..f99f41950
--- /dev/null
+++ b/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala
@@ -0,0 +1,11 @@
+package ai.privado.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.entrypoint.PrivadoInput
+import io.shiftleft.codepropertygraph.generated.Cpg
+
+trait APISinkTagger {
+
+ def applyTagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput): Unit = ???
+
+}
diff --git a/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala b/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala
index 877f85c3d..3019ddcf0 100644
--- a/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala
+++ b/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala
@@ -38,8 +38,9 @@ import ai.privado.utility.Utilities.{
}
import io.joern.dataflowengineoss.language.*
import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext}
-import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, Member}
+import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, JavaProperty, Member}
import overflowdb.BatchedUpdate
+import overflowdb.BatchedUpdate.DiffGraphBuilder
object APITaggerUtility {
@@ -48,8 +49,9 @@ object APITaggerUtility {
def getLiteralCode(element: AstNode): String = {
val literalCode = element match {
- case member: Member => member.name
- case _ => element.code.split(" ").last
+ case member: Member => member.name
+ case propertyNode: JavaProperty => propertyNode.value
+ case _ => element.code.split(" ").last
}
element.originalPropertyValue.getOrElse(literalCode)
@@ -83,7 +85,9 @@ object APITaggerUtility {
else {
val domain = resolveDomainFromSource(sourceNode)
newRuleIdToUse = ruleInfo.id + "." + domain
- ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain))
+ ruleCache.setRuleInfo(
+ ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true)
+ )
addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
}
storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, getLiteralCode(sourceNode))
@@ -110,7 +114,7 @@ object APITaggerUtility {
urlValue.stripPrefix("\"").stripSuffix("\"")
}
- private def resolveDomainFromSource(sourceNode: AstNode): String = {
+ def resolveDomainFromSource(sourceNode: AstNode): String = {
val sourceDomain = sourceNode.originalPropertyValue.getOrElse(getLiteralCode(sourceNode))
if (sourceDomain.matches(SERVICE_URL_REGEX_PATTERN)) {
sourceDomain.split("//").last
@@ -118,4 +122,19 @@ object APITaggerUtility {
getDomainFromString(sourceDomain)
}
}
+
+ def tagAPIWithDomainAndUpdateRuleCache(
+ builder: DiffGraphBuilder,
+ ruleInfo: RuleInfo,
+ ruleCache: RuleCache,
+ domain: String,
+ apiNode: AstNode,
+ apiUrlNode: AstNode
+ ) = {
+ val newRuleIdToUse = ruleInfo.id + "." + domain
+ ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true))
+ addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse))
+ storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, getLiteralCode(apiUrlNode))
+
+ }
}
diff --git a/src/test/scala/ai/privado/RuleInfoTestData.scala b/src/test/scala/ai/privado/RuleInfoTestData.scala
deleted file mode 100644
index 605a9cbff..000000000
--- a/src/test/scala/ai/privado/RuleInfoTestData.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-package ai.privado
-
-import ai.privado.model.{CatLevelOne, ConfigAndRules, FilterProperty, Language, NodeType, RuleInfo}
-
-object RuleInfoTestData {
-
- val sourceRule = List(
- RuleInfo(
- "Data.Sensitive.FirstName",
- "FirstName",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*firstName|first_name.*"),
- false,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.JAVA,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.AccountData.AccountPassword",
- "AccountPassword",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*password.*"),
- false,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.JAVA,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.PersonalIdentification.LastName",
- "LastName",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*lastName.*"),
- false,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.UNKNOWN,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.PersonalIdentification.DateofBirth",
- "Date of Birth",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*dob.*"),
- false,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.UNKNOWN,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.ContactData.EmailAddress",
- "EmailAddress",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*email.*"),
- true,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.UNKNOWN,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.ContactData.PhoneNumber",
- "Phone",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*phone.*"),
- true,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.UNKNOWN,
- Array()
- ),
- RuleInfo(
- "Data.Sensitive.FinancialData.Salary",
- "Salary",
- "",
- FilterProperty.METHOD_FULL_NAME,
- Array(),
- List("(?i).*salary.*"),
- true,
- "",
- Map(),
- NodeType.REGULAR,
- "",
- CatLevelOne.SOURCES,
- "",
- Language.UNKNOWN,
- Array()
- )
- )
-
- val rule: ConfigAndRules =
- ConfigAndRules(sourceRule, List(), List(), List(), List(), List(), List(), List(), List(), List())
-}
diff --git a/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala b/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala
index f775c65ef..9cbfdc29d 100644
--- a/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala
+++ b/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala
@@ -22,12 +22,15 @@ class RepoConfigMetadataExporterTest extends RepoConfigMetadataExporterBase {
"Test Repo config Metadata sample" should {
"should return correct metadata" in {
- val resultMap = RepoConfigMetaDataExporter.getMetaData(cpg, ruleCache).toMap
- resultMap.keys.toList should contain("name")
- resultMap("name") should equal("exampleService")
+ val resultSet = RepoConfigMetaDataExporter.getMetaData(cpg, ruleCache).toArray
+ resultSet.length shouldBe 2
+ resultSet(0)("name") should equal("name")
+ resultSet(0)("value") should equal("exampleService")
+ resultSet(0)("filePath").contains("test.yaml") shouldBe true
- resultMap.keys.toList should contain("config.prod.DB_HOST_NAME")
- resultMap("config.prod.DB_HOST_NAME") should equal("example.com")
+ resultSet(1)("name") should equal("config.prod.DB_HOST_NAME")
+ resultSet(1)("value") should equal("example.com")
+ resultSet(1)("filePath").contains("test.yaml") shouldBe true
}
}
}
diff --git a/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala b/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala
index 6df07214d..26ca64891 100644
--- a/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala
+++ b/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala
@@ -1,6 +1,5 @@
package ai.privado.languageEngine.csharp
-import ai.privado.RuleInfoTestData
import ai.privado.cache.{AuditCache, DataFlowCache, RuleCache, TaggerCache}
import ai.privado.entrypoint.PrivadoInput
import ai.privado.model.*
@@ -20,6 +19,7 @@ import io.shiftleft.semanticcpg.layers.*
import io.joern.dataflowengineoss.layers.dataflows.*
import ai.privado.languageEngine.csharp.tagger.source.IdentifierTagger
import ai.privado.model.SourceCodeModel
+import ai.privado.rule.RuleInfoTestData
import ai.privado.tagger.source.LiteralTagger
abstract class CSharpTestBase extends AnyWordSpec with Matchers with BeforeAndAfterAll with BeforeAndAfterEach {
diff --git a/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala b/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala
new file mode 100644
index 000000000..73b52f34a
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala
@@ -0,0 +1,36 @@
+package ai.privado.languageEngine.java
+
+import ai.privado.languageEngine.ruby.RubyTestBase.code
+import ai.privado.model.SourceCodeModel
+import better.files.File
+import io.joern.dataflowengineoss.layers.dataflows.{OssDataFlow, OssDataFlowOptions}
+import io.joern.javasrc2cpg.{Config, JavaSrc2Cpg}
+import io.joern.x2cpg.X2Cpg.applyDefaultOverlays
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.semanticcpg.layers.LayerCreatorContext
+
+object JavaTestBase {
+ def code(sourceCodes: List[SourceCodeModel], applyPostProcessingPass: Boolean = false): (Cpg, Config) = {
+
+ val (cpg, config) = code(sourceCodes)
+
+ val context = new LayerCreatorContext(cpg)
+ val options = new OssDataFlowOptions()
+ new OssDataFlow(options).run(context)
+
+ (cpg, config)
+ }
+
+ private def code(sourceCodes: List[SourceCodeModel]): (Cpg, Config) = {
+ val inputDir = File.newTemporaryDirectory()
+ for (sourceCode <- sourceCodes) {
+ (inputDir / sourceCode.fileName).write(sourceCode.sourceCode)
+ }
+ val outputFile = File.newTemporaryFile()
+
+ val config = Config().withInputPath(inputDir.pathAsString).withOutputPath(outputFile.pathAsString)
+ val cpg = new JavaSrc2Cpg().createCpg(config).get
+ applyDefaultOverlays(cpg)
+ (cpg, config)
+ }
+}
diff --git a/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala b/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala
index 158487085..f7096d733 100644
--- a/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala
+++ b/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala
@@ -23,7 +23,7 @@
package ai.privado.languageEngine.java.passes.config
-import ai.privado.cache.RuleCache
+import ai.privado.cache.{AppCache, RuleCache}
import ai.privado.languageEngine.java.language.*
import ai.privado.model.Language
import ai.privado.utility.PropertyParserPass
@@ -43,6 +43,7 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") {
"""
|internal.logger.api.base=https://logger.privado.ai/
|slack.base.url=https://hooks.slack.com/services/some/leaking/url
+ |MY_ENDPOINT=http://myservice.com/user
|""".stripMargin
override val propertyFileContents = ""
@@ -60,7 +61,7 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") {
|
|public AuthenticationService(UserRepository userr, SessionsR sesr, ModelMapper mapper,
| ObjectMapper objectMapper, @Qualifier("ApiCaller") ExecutorService apiExecutor, SlackStub slackStub,
- | SendGridStub sgStub, @Value("${internal.logger.api.base}") String loggerBaseURL) {
+ | SendGridStub sgStub, @Value("${internal.logger.api.base}") String loggerBaseURL, @Named(Constants.MY_ENDPOINT) String endpoint) {
| }
|
|@Value("${internal.logger.api.base}")
@@ -74,25 +75,28 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") {
"ConfigFilePass" should {
"connect annotated parameter to property" in {
val anno: List[AstNode] = cpg.property.usedAt.l
- anno.length shouldBe 3
+ anno.length shouldBe 4
anno.code.l shouldBe List(
"@Value(\"${internal.logger.api.base}\") String loggerBaseURL",
"java.lang.String loggerUrl",
+ "@Named(Constants.MY_ENDPOINT) String endpoint",
"java.lang.String slackWebHookURL"
)
}
"connect property to annotated parameter" in {
- // cpg.property.usedAt.originalProperty.l.length shouldBe 3
+ cpg.property.usedAt.originalProperty.l.length shouldBe 4
cpg.property.usedAt.originalProperty.name.l shouldBe List(
"internal.logger.api.base",
"internal.logger.api.base",
+ "MY_ENDPOINT",
"slack.base.url"
)
cpg.property.usedAt.originalProperty.value.l shouldBe List(
"https://logger.privado.ai/",
"https://logger.privado.ai/",
+ "http://myservice.com/user",
"https://hooks.slack.com/services/some/leaking/url"
)
}
@@ -194,6 +198,25 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") {
|spring:
| application:
| name: basepath
+ |false-positive-entries:
+ | urls:
+ | - http:
+ | path1: en-wrapper/0.5.6/maven-wrapper-0.5.6.jar
+ | path2: che-maven/3.6.3/apache-maven-3.6.3-bin.zip
+ | path3: dkr.ecr.us-west-2.amazonaws.com/infrastructure/ecr-pusher:latest
+ | path4: mvn -U -P ${ENVIRONMENT} package -DskipTests --settings ${home}/.m2/settings.xml
+ | path5: somename.jpg
+ | path6: somename.png
+ | path7: somename.gif
+ | path8: string having html tags hello
and world
+ | path9: /a/b/c containing spaces
+ | path10: github.com/a/b/c
+ | pathe11: ../some/file/path
+ | path12: #somecomment
+ | path13: ///a/b/c
+ | path14: ./some/file/path
+ |
+ |
|mx-record-delete:
| events:
| - http:
@@ -208,6 +231,12 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") {
| - ssm:
| path: /
| method: PUT
+ | - privado:
+ | path: https://code.privado.ai/repositories
+ | method: PUT
+ | - privado-without-http:
+ | path: code.privado.ai/repositories
+ | method: PUT
|""".stripMargin
override val codeFileContents =
"""
@@ -216,12 +245,14 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") {
override val propertyFileContents = ""
- "Fetch egress urls from property files" ignore {
+ "Fetch egress urls from property files" should {
"Check egress urls" in {
- val egressExporter = HttpConnectionMetadataExporter(cpg, new RuleCache)
- val List(url1, url2) = egressExporter.getEgressUrls
+ val egressExporter = HttpConnectionMetadataExporter(cpg, new RuleCache)
+ val List(url1, url2, url3, url4) = egressExporter.getEgressUrls
url1 shouldBe "/v1/student/{id}"
url2 shouldBe "v1/student/{id}"
+ url3 shouldBe "https://code.privado.ai/repositories"
+ url4 shouldBe "code.privado.ai/repositories"
}
"Check egress urls with single char" in {
@@ -338,6 +369,7 @@ abstract class PropertiesFilePassTestBase(fileExtension: String)
new PropertyParserPass(cpg, inputDir.toString(), new RuleCache, Language.JAVA).createAndApply()
new JavaEnvPropertyLinkerPass(cpg).createAndApply()
new JavaAnnotationPropertyLinkerPass(cpg).createAndApply()
+ AppCache.repoLanguage = Language.JAVA
super.beforeAll()
}
diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala
new file mode 100644
index 000000000..db10fce78
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala
@@ -0,0 +1,74 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.entrypoint.PrivadoInput
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
+import ai.privado.languageEngine.java.JavaTestBase.*
+import ai.privado.model.{ConfigAndRules, Constants, InternalTag, Language, SourceCodeModel, SystemConfig}
+import ai.privado.rule.RuleInfoTestData
+import io.shiftleft.semanticcpg.language.*
+
+class JavaAPISinkByMethodFullNameTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll {
+
+ "call which match api methodFullName regex" should {
+ "match" in {
+
+ val (cpg, config) = code(
+ List(
+ SourceCodeModel(
+ """
+ |
+ |import java.io.BufferedReader;
+ |import java.io.IOException;
+ |import java.io.InputStreamReader;
+ |import java.net.HttpURLConnection;
+ |import java.net.URL;
+ |
+ |public class HttpRequestExample {
+ | public static void main(String[] args) {
+ | try {
+ | // Specify the URL to send the request to
+ | URL url = new URL("https://jsonplaceholder.typicode.com/posts/1");
+ |
+ | // Open a connection to the URL
+ | HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+ |
+ | // Set request method to GET
+ | connection.setRequestMethod("GET");
+ |
+ | // Get the response code
+ | int responseCode = connection.getResponseCode();
+ | }
+ | }
+ |}
+ |""".stripMargin,
+ "HttpRequestExample.java"
+ )
+ )
+ )
+
+ val ruleCache = RuleCache()
+ ruleCache.setRule(
+ ConfigAndRules(systemConfig =
+ List(
+ SystemConfig(
+ Constants.apiMethodFullNames,
+ "java.net.HttpURLConnection.*",
+ Language.UNKNOWN,
+ "",
+ Array[String]()
+ )
+ )
+ )
+ )
+ JavaAPISinkTagger.applyTagger(cpg, ruleCache = ruleCache, privadoInput = PrivadoInput())
+
+ val apiSinks = cpg.call("getResponseCode").l
+
+ apiSinks.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1
+ }
+ }
+
+}
diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala
new file mode 100644
index 000000000..90478f3e9
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala
@@ -0,0 +1,103 @@
+package ai.privado.languageEngine.java.tagger.sink.api
+
+import ai.privado.cache.RuleCache
+import ai.privado.entrypoint.PrivadoInput
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
+import ai.privado.languageEngine.java.JavaTestBase.*
+import ai.privado.languageEngine.java.tagger.sink.JavaAPITagger
+import ai.privado.model.{CatLevelOne, Constants, InternalTag, Language, NodeType, SourceCodeModel, SystemConfig}
+import ai.privado.rule.RuleInfoTestData
+import io.shiftleft.semanticcpg.language.*
+
+class JavaAPISinkByParameterTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll {
+
+ "Api by matching a variable like parameter" should {
+ "be tagged as a API sink" in {
+
+ val (cpg, config) = code(
+ List(
+ SourceCodeModel(
+ """
+ |import java.util.List;
+ |import ai.privado.client.Client;
+ |
+ |public class EndpointClient {
+ |
+ | private String config;
+ |
+ | public EndpointClient(String config) {
+ | this.config = config;
+ | }
+ |
+ |
+ | public Client getClient(String endpoint) {
+ | // Logic to create and return a client based on the endpoint
+ | Client client = new Client(endpoint);
+ | return client;
+ | }
+ |}
+ |
+ |""".stripMargin,
+ "EndpointClient.java"
+ ),
+ SourceCodeModel(
+ """
+ |import java.util.List;
+ |import ai.privado.client.Client;
+ |
+ |public class Main {
+ | private Client client;
+ |
+ | private EndpointClient endpointClient;
+ |
+ | public Main(String endpoint, String config) {
+ | this.client = new Client();
+ | this.endpointClient = new EndpointClient(config);
+ | }
+ |
+ | public List getAllDetails() {
+ |
+ |
+ | return client.getAllDetails(); // This should be marked as API Sink by url like matching
+ | }
+ |
+ | public List getDetailsByEndpoint() {
+ | String url = "https://www.myproduction.com/user/endpoint";
+ |
+ | return endpointClient.getDetailsByEndpoint(url); // This should be marked as API Sink by config like matching
+ |
+ | }
+ |}
+ |""".stripMargin,
+ "Main.java"
+ )
+ )
+ )
+
+ val privadoInput = PrivadoInput(enableAPIByParameter = true)
+ val ruleCache = RuleCache()
+ val systemConfig =
+ List(SystemConfig(Constants.apiIdentifier, "(?i).*endpoint.*", Language.UNKNOWN, "", Array[String]()))
+ ruleCache.setRule(RuleInfoTestData.rule.copy(systemConfig = systemConfig))
+ JavaAPISinkTagger.applyTagger(cpg, ruleCache = ruleCache, privadoInput = privadoInput)
+
+ new JavaAPITagger(cpg, ruleCache, privadoInputConfig = privadoInput).createAndApply()
+
+ val apiSink = cpg.call("getAllDetails").l
+ apiSink.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1
+ apiSink.tag.nameExact(Constants.catLevelOne).value.headOption shouldBe Some(CatLevelOne.SINKS.name)
+ apiSink.tag.nameExact(Constants.nodeType).value.headOption shouldBe Some(NodeType.API.toString)
+
+ apiSink.tag.nameExact(Constants.id).value.l shouldBe List("Sinks.ThirdParties.API.endpoint")
+ apiSink.tag.nameExact(Constants.apiUrl + "Sinks.ThirdParties.API.endpoint").value.l shouldBe List("endpoint")
+
+ val apiSinkByEndpoint = cpg.call("getDetailsByEndpoint").l
+ apiSinkByEndpoint.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1
+ apiSinkByEndpoint.tag.nameExact(Constants.catLevelOne).value.headOption shouldBe Some(CatLevelOne.SINKS.name)
+ apiSinkByEndpoint.tag.nameExact(Constants.nodeType).value.headOption shouldBe Some(NodeType.API.toString)
+ }
+ }
+
+}
diff --git a/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala b/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala
new file mode 100644
index 000000000..5c0091bef
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala
@@ -0,0 +1,92 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+
+package ai.privado.languageEngine.php
+
+import ai.privado.rule.RuleInfoTestData
+import ai.privado.cache.*
+import ai.privado.entrypoint.PrivadoInput
+import ai.privado.languageEngine.php.processor.PhpProcessor
+import ai.privado.languageEngine.php.tagger.source.IdentifierTagger
+import ai.privado.model.*
+import ai.privado.tagger.source.LiteralTagger
+import ai.privado.threatEngine.ThreatEngineExecutor
+import better.files.File
+import io.joern.php2cpg.{Config, Php2Cpg}
+import io.joern.x2cpg.X2Cpg
+import io.shiftleft.codepropertygraph.generated.Cpg
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+
+import scala.collection.mutable
+
+abstract class PhpTestBase extends AnyWordSpec with Matchers with BeforeAndAfterAll with BeforeAndAfterEach {
+ private val cpgs = mutable.ArrayBuffer.empty[Cpg]
+ private val outPutFiles = mutable.ArrayBuffer.empty[File]
+ private val inputDirs = mutable.ArrayBuffer.empty[File]
+
+ val taggerCache = new TaggerCache()
+
+ val configAndRules: ConfigAndRules =
+ ConfigAndRules(RuleInfoTestData.sourceRule, List(), List(), List(), List(), List(), List(), List(), List(), List())
+
+ def code(code: String): (Cpg, ThreatEngineExecutor) = {
+ val ruleCache = new RuleCache()
+ val auditCache = new AuditCache()
+ val privadoInput = PrivadoInput()
+ val dataFlowCache = new DataFlowCache(privadoInput, auditCache)
+
+ val inputDir = File.newTemporaryDirectory()
+ inputDirs.addOne(inputDir)
+ (inputDir / "main.php").write(code)
+
+ val outputFile: File = File.newTemporaryFile()
+ outPutFiles.addOne(outputFile)
+ val config = Config()
+ .withInputPath(inputDir.pathAsString)
+ .withOutputPath(outputFile.pathAsString)
+ .withPhpParserBin(PhpProcessor.parserBinPath)
+
+ ruleCache.setRule(configAndRules)
+ val cpg = new Php2Cpg().createCpg(config).get
+ AppCache.repoLanguage = Language.PHP
+
+ X2Cpg.applyDefaultOverlays(cpg)
+ Php2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply())
+ new IdentifierTagger(cpg, ruleCache, taggerCache).createAndApply()
+ new LiteralTagger(cpg, ruleCache).createAndApply()
+
+ cpgs.addOne(cpg)
+ val threatEngine =
+ new ThreatEngineExecutor(
+ cpg,
+ config.inputPath,
+ ruleCache,
+ null,
+ dataFlowCache.getDataflowAfterDedup,
+ privadoInput
+ )
+ (cpg, threatEngine)
+ }
+}
diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala
new file mode 100644
index 000000000..e2f28bc21
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala
@@ -0,0 +1,54 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+package ai.privado.languageEngine.php.tagger.source
+
+import ai.privado.languageEngine.php.PhpTestBase
+import ai.privado.model.*
+import io.shiftleft.semanticcpg.language.*
+
+class FieldIdentifierTaggingTests extends PhpTestBase {
+ "Field access in code" should {
+ "be tagged as part of identifier tagger" in {
+ val (cpg, _) = code("""
+ |firstName = "John";
+ | add_phone("phone");
+ | }
+ |
+ | function add_phone($ph) {
+ | //
+ | }
+ |}
+ |?>
+ |""".stripMargin)
+
+ val List(firstNameField) = cpg.fieldAccess.l
+ firstNameField.code shouldBe "$this->firstName"
+ firstNameField.tag.nameExact(Constants.catLevelOne).value.l shouldBe List(CatLevelOne.SOURCES.name)
+ }
+ }
+}
diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala
new file mode 100644
index 000000000..12c902adf
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala
@@ -0,0 +1,74 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+package ai.privado.languageEngine.php.tagger.source
+
+import ai.privado.languageEngine.php.PhpTestBase
+import ai.privado.model.*
+import io.shiftleft.semanticcpg.language.*
+
+class IdentifierTaggingTest extends PhpTestBase {
+ "Tagging derived sources" should {
+ val (cpg, _) = code("""
+ |firstName = $fname;
+ | $this->lastName = $lname;
+ | $this->age = $userAge;
+ | $this->email = $userEmail;
+ | $this->dob = $userDob;
+ | }
+ | }
+ |
+ | $user = new User("a", "b", 1, "c@d.com", "01-01-90");
+ | echo $user->firstName;
+ |?>
+ |
+ |""".stripMargin)
+
+ "tag member in a structure" in {
+ cpg.member("firstName").tag.nameExact(Constants.id).value.l shouldBe List("Data.Sensitive.FirstName")
+
+ cpg.member("dob").tag.nameExact(Constants.id).value.l shouldBe List(
+ "Data.Sensitive.PersonalIdentification.DateofBirth"
+ )
+ }
+
+ "be tagged as part of identifier tagger" in {
+ val userObj = cpg.identifier("user").lineNumber(20).l
+ userObj.tag
+ .where(_.nameExact(InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString))
+ .value
+ .head shouldBe "Data.Sensitive.FirstName"
+ userObj.tag.where(_.nameExact(Constants.id)).size shouldBe 1
+ userObj.tag.where(_.nameExact(Constants.catLevelOne)).value.l shouldBe List(CatLevelOne.DERIVED_SOURCES.name)
+ }
+ }
+}
diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala
new file mode 100644
index 000000000..f62b21773
--- /dev/null
+++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala
@@ -0,0 +1,50 @@
+/*
+ * This file is part of Privado OSS.
+ *
+ * Privado is an open source static code analysis tool to discover data flows in the code.
+ * Copyright (C) 2022 Privado, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ * For more information, contact support@privado.ai
+ *
+ */
+package ai.privado.languageEngine.php.tagger.source
+
+import ai.privado.languageEngine.php.PhpTestBase
+import ai.privado.model.*
+import io.shiftleft.semanticcpg.language.*
+
+class LiteralTaggingTests extends PhpTestBase {
+ "Literals in code" should {
+ "be tagged as part of LiteralTagger" in {
+ val (cpg, _) = code("""
+ |
+ |""".stripMargin)
+
+ val literals = cpg.literal.l
+ literals.last.code shouldBe "\"phone\""
+ literals.last.tag.nameExact(Constants.catLevelOne).value.l shouldBe List(CatLevelOne.SOURCES.name)
+ }
+ }
+}
diff --git a/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala
index 5cd0cb1dd..ac67a85d8 100644
--- a/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala
+++ b/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala
@@ -1,6 +1,5 @@
package ai.privado.languageEngine.ruby.monolith
-import ai.privado.RuleInfoTestData
import ai.privado.cache.{AppCache, AuditCache, DataFlowCache, RuleCache, S3DatabaseDetailsCache, TaggerCache}
import ai.privado.dataflow.Dataflow
import ai.privado.entrypoint.PrivadoInput
@@ -9,6 +8,7 @@ import ai.privado.languageEngine.base.processor.BaseProcessor
import ai.privado.languageEngine.go.tagger.source.IdentifierTagger
import ai.privado.languageEngine.ruby.tagger.monolith.MonolithTagger
import ai.privado.model.{Constants, Language}
+import ai.privado.rule.RuleInfoTestData
import ai.privado.utility.PropertyParserPass
import better.files.File
import io.joern.dataflowengineoss.language.Path
diff --git a/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala
index e78c8d71a..b643ca432 100644
--- a/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala
+++ b/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala
@@ -1,6 +1,5 @@
package ai.privado.languageEngine.ruby.passes
-import ai.privado.RuleInfoTestData
import ai.privado.cache.RuleCache
import ai.privado.languageEngine.ruby.RubyTestBase.*
import ai.privado.languageEngine.ruby.passes.SchemaParser
@@ -19,6 +18,7 @@ import org.scalatest.BeforeAndAfterAll
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
import ai.privado.model.SourceCodeModel
+import ai.privado.rule.RuleInfoTestData
class SchemaParserTest extends AnyWordSpec with Matchers with BeforeAndAfterAll {
diff --git a/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala
index 63e89d8ee..2107a8ef5 100644
--- a/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala
+++ b/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala
@@ -1,6 +1,5 @@
package ai.privado.languageEngine.ruby.tagger.source
-import ai.privado.RuleInfoTestData
import ai.privado.cache.RuleCache
import ai.privado.languageEngine.ruby.RubyTestBase.*
import ai.privado.languageEngine.ruby.passes.SchemaParser
@@ -19,6 +18,7 @@ import org.scalatest.BeforeAndAfterAll
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
import ai.privado.model.SourceCodeModel
+import ai.privado.rule.RuleInfoTestData
class RubyLiteralDerivedTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll {
diff --git a/src/test/scala/ai/privado/model/SourceCodeModel.scala b/src/test/scala/ai/privado/model/SourceCodeModel.scala
index 11f52ce26..8b62c3ddd 100644
--- a/src/test/scala/ai/privado/model/SourceCodeModel.scala
+++ b/src/test/scala/ai/privado/model/SourceCodeModel.scala
@@ -1,3 +1,2 @@
package ai.privado.model
-
case class SourceCodeModel(sourceCode: String, fileName: String)
diff --git a/src/test/scala/ai/privado/rule/RuleInfoTestData.scala b/src/test/scala/ai/privado/rule/RuleInfoTestData.scala
new file mode 100644
index 000000000..0f907f151
--- /dev/null
+++ b/src/test/scala/ai/privado/rule/RuleInfoTestData.scala
@@ -0,0 +1,23 @@
+package ai.privado.rule
+
+import ai.privado.model.*
+import ai.privado.rule.SourceRuleTestData._
+import ai.privado.rule.SinkRuleTestData._
+
+object RuleInfoTestData {
+
+ val sourceRule = List(
+ firstNameSourceRule,
+ accountPasswordSourceRule,
+ lastNameSourceRule,
+ dobSourceRule,
+ emailSourceRule,
+ phoneNumberSourceRule,
+ salarySourceRule
+ )
+
+ val sinkRule = List(thirdPartyAPIRule)
+
+ val rule: ConfigAndRules =
+ ConfigAndRules(sources = sourceRule, sinks = sinkRule)
+}
diff --git a/src/test/scala/ai/privado/rule/SinkRuleTestData.scala b/src/test/scala/ai/privado/rule/SinkRuleTestData.scala
new file mode 100644
index 000000000..d8c51864b
--- /dev/null
+++ b/src/test/scala/ai/privado/rule/SinkRuleTestData.scala
@@ -0,0 +1,27 @@
+package ai.privado.rule
+
+import ai.privado.model.{CatLevelOne, Constants, FilterProperty, Language, NodeType, RuleInfo}
+
+object SinkRuleTestData {
+
+ val thirdPartyAPIRule = RuleInfo(
+ Constants.thirdPartiesAPIRuleId,
+ "Third Party API",
+ "",
+ FilterProperty.METHOD_FULL_NAME,
+ Array(),
+ List(
+ "((?i)((?:http:|https:|ftp:|ssh:|udp:|wss:){0,1}(\\/){0,2}[a-zA-Z0-9_-][^)\\/(#|,!>\\s]{1,50}\\.(?:com|net|org|de|in|uk|us|io|gov|cn|ml|ai|ly|dev|cloud|me|icu|ru|info|top|tk|tr|cn|ga|cf|nl)).*(?