diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index d3343317a..813e90ed1 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -35,6 +35,6 @@ jobs: distribution: 'temurin' java-version: '17' - name: Run unit test - run: sbt test test:test + run: sbt -J-Xmx4G test test:test - run: echo "Previous step failed because unit test failed." - if: ${{ failure() }} \ No newline at end of file + if: ${{ failure() }} diff --git a/Dockerfile b/Dockerfile index ed9d22cf9..1ab0ec1d8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ARG VERSION=1.0.0 FROM openjdk:18.0.2.1-jdk-bullseye as build -RUN apt update && apt install -y python3 git curl bash ruby-full +RUN apt update && apt install -y python3 git curl bash ruby-full php RUN ln -sf python3 /usr/bin/python ENV SBT_VERSION 1.7.1 ENV SBT_HOME /usr/local/sbt diff --git a/README.md b/README.md index 59829fb67..6b80aed44 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Privado Core ============================================= -Branch structure +Branch structure main - This branch will contain the released version of the code. diff --git a/build.sbt b/build.sbt index 021254cc3..14fb59ea3 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,5 @@ import sbt.Credentials +import better.files.File name := "privado-core" ThisBuild / organization := "ai.privado" @@ -206,12 +207,42 @@ stage := Def Compile / compile := ((Compile / compile) dependsOn dotnetAstGenDlTask).value -// Also remove astgen binaries with clean, e.g., to allow for updating them. +// Download php-parser: start +// This is based on how php2cpg vendors the php-parser in Joern +val phpParserVersion = "4.15.7" +val upstreamParserBinName = "php-parser.phar" +val versionedParserBinName = s"php-parser-$phpParserVersion.phar" +val phpParserDlUrl = + s"https://github.com/joernio/PHP-Parser/releases/download/v$phpParserVersion/$upstreamParserBinName" + +Compile / compile := ((Compile / compile) dependsOn phpParseDlTask).value + +lazy val phpParseDlTask = taskKey[Unit]("Download php-parser binaries") +phpParseDlTask := { + val phpBinDir = baseDirectory.value / "bin" / "php-parser" + phpBinDir.mkdirs() + + val downloadedFile = SimpleCache.downloadMaybe(phpParserDlUrl) + IO.copyFile(downloadedFile, phpBinDir / versionedParserBinName) + + File((phpBinDir / "php-parser.php").getPath) + .createFileIfNotExists() + .overwrite(s"") + + val distDir = (Universal / stagingDirectory).value / "bin" / "php-parser" + distDir.mkdirs() + IO.copyDirectory(phpBinDir, distDir) +} +// Download php-parser: end + +// Also remove astgen and php-parser binaries with clean, e.g., to allow for updating them. // Sadly, we can't define the bin/ folders globally, // as .value can only be used within a task or setting macro cleanFiles ++= Seq( baseDirectory.value / "bin" / "astgen", - (Universal / stagingDirectory).value / "bin" / "astgen" + (Universal / stagingDirectory).value / "bin" / "astgen", + baseDirectory.value / "bin" / "php-parser", + (Universal / stagingDirectory).value / "bin" / "php-parser" ) ++ astGenBinaryNames.map(fileName => SimpleCache.encodeFile(s"$astGenDlUrl$fileName")) Compile / doc / sources := Seq.empty Compile / packageDoc / publishArtifact := false diff --git a/src/main/scala/ai/privado/entrypoint/CommandParser.scala b/src/main/scala/ai/privado/entrypoint/CommandParser.scala index e1a6877f7..f29cc6022 100644 --- a/src/main/scala/ai/privado/entrypoint/CommandParser.scala +++ b/src/main/scala/ai/privado/entrypoint/CommandParser.scala @@ -43,6 +43,7 @@ case class PrivadoInput( disableReadDataflow: Boolean = false, enableAPIDisplay: Boolean = false, enableLambdaFlows: Boolean = false, + enableAPIByParameter: Boolean = false, ignoreExcludeRules: Boolean = false, ignoreSinkSkipRules: Boolean = false, skipUpload: Boolean = false, @@ -85,6 +86,8 @@ object CommandConstants { val ENABLE_API_DISPLAY_ABBR = "ead" val ENABLE_LAMBDA_FLOWS = "enable-lambda-flows" val ENABLE_LAMBDA_FLOWS_ABBR = "elf" + val ENABLE_API_BY_PARAMETER = "enable-api-by-parameter" + val ENABLE_API_BY_PARAMETER_ABBR = "eabyp" val IGNORE_EXCLUDE_RULES = "ignore-exclude-rules" val IGNORE_EXCLUDE_RULES_ABBR = "ier" val UPLOAD = "upload" @@ -211,6 +214,11 @@ object CommandParser { .optional() .action((_, c) => c.copy(enableLambdaFlows = true)) .text("Enable lambda flows"), + opt[Unit](CommandConstants.ENABLE_API_BY_PARAMETER) + .abbr(CommandConstants.ENABLE_API_BY_PARAMETER_ABBR) + .optional() + .action((_, c) => c.copy(enableAPIByParameter = true)) + .text("Enable API tagging by parameter name match"), opt[Unit](CommandConstants.IGNORE_EXCLUDE_RULES) .abbr(CommandConstants.IGNORE_EXCLUDE_RULES_ABBR) .optional() diff --git a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala index 6c0d414e5..8740d03a0 100644 --- a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala +++ b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala @@ -31,6 +31,7 @@ import ai.privado.languageEngine.ruby.processor.RubyProcessor import ai.privado.languageEngine.default.processor.DefaultProcessor import ai.privado.languageEngine.kotlin.processor.KotlinProcessor import ai.privado.languageEngine.go.processor.GoProcessor +import ai.privado.languageEngine.php.processor.PhpProcessor import ai.privado.metric.MetricHandler import ai.privado.model.Language.Language import ai.privado.model.* @@ -430,6 +431,18 @@ object ScanProcessor extends CommandProcessor { auditCache, s3DatabaseDetailsCache ).processCpg() + case language if language == Languages.PHP => + println(s"${Calendar.getInstance().getTime} - Detected language 'PHP'") + new PhpProcessor( + getProcessedRule(Set(Language.PHP)), + this.config, + sourceRepoLocation, + Language.PHP, + dataFlowCache = getDataflowCache, + auditCache, + s3DatabaseDetailsCache + ) + .processCpg() case _ => if (checkJavaSourceCodePresent(sourceRepoLocation)) { println( diff --git a/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala b/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala index 17f0f42d7..a5aaf9546 100644 --- a/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala +++ b/src/main/scala/ai/privado/exporter/HttpConnectionMetadataExporter.scala @@ -38,6 +38,7 @@ class HttpConnectionMetadataExporter(cpg: Cpg, ruleCache: RuleCache) { private val FEIGN_CLIENT = "FeignClient" private val SPRING_ANNOTATION_ID = "Collections.Annotation.Spring" private val STRING_START_WITH_SLASH = "/.{2,}" + private val STRING_CONTAINS_URL = ".*\\.[a-z]{2,5}/[a-z]{2,}.*" private val STRING_CONTAINS_TWO_SLASH = ".*/.*/.*" private val SPRING_APPLICATION_BASE_PATH = "(?i)(server[.]servlet[.]context-path|server[.]servlet[.]contextPath)|(spring[.]application[.]name)" @@ -45,7 +46,12 @@ class HttpConnectionMetadataExporter(cpg: Cpg, ruleCache: RuleCache) { private val ALPHABET = "[a-zA-Z]" private val STRING_WITH_CONSECUTIVE_DOTS_OR_DOT_SLASH_OR_NEWLINE = "(?s).*(\\.\\.|\\./|\n).*" private val ESCAPE_STRING_SLASHES = "(\\\")" - private val IMPORT_REGEX_WITH_SLASHES = "(?s)^(?=.*/)(?!.*/$).*" + // Regex to eliminate pattern ending with file suffix + // Demo: https://regex101.com/r/ojV93D/1 + private val FILE_SUFFIX_REGEX_PATTERN = ".*[.][a-z]{2,5}(\\\")?$" + private val SUFFIX_PATTERN = "^(\\.\\/|\\.\\.|\\/\\/).*" + private val COMMON_FALSE_POSITIVE_EGRESS_PATTERN = + ".*(BEGIN PRIVATE KEY|sha512|googleapis|sha1|amazonaws|github||

| p.name matches configRule).l - propertySources.foreach(p => { - metaData.put(p.name, p.value) - }) + cpg.property + .filter(p => p.name matches configRule) + .l + .map(p => { + HashMap( + Constants.name -> p.name, + Constants.value -> p.value, + Constants.filePath -> p.file.name.headOption.getOrElse("") + ) + }) } catch { - case ex: Exception => println("Error while fetching repo config metadata") + case ex: Exception => { + println("Error while fetching repo config metadata") + List() + } } - metaData } } diff --git a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala index 80fb39897..0325c778a 100644 --- a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala +++ b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaAnnotationPropertyLinkerPass.scala @@ -10,54 +10,66 @@ class JavaAnnotationPropertyLinkerPass(cpg: Cpg) extends PrivadoParallelCpgPass[ override def generateParts(): Array[_ <: AnyRef] = { cpg.annotation - .nameExact("Value") + .name(".*(Value|Named).*") .filter(_.parameterAssign.nonEmpty) .toArray } override def runOnPart(builder: DiffGraphBuilder, annotation: Annotation): Unit = { - /** List of all parameters annotated with Spring's `Value` annotation, along with the property name. - */ - if (annotation.parameterAssign.code("\\\"\\$\\{.*\\}\\\"").nonEmpty && annotation.parameter.nonEmpty) { - val literalName = annotation.parameterAssign.code.head - val value = Option(literalName.slice(3, literalName.length - 2)).getOrElse("") + if (annotation.name == "Value") { + + /** List of all parameters annotated with Spring's `Value` annotation, along with the property name. + */ + if (annotation.parameterAssign.code("\\\"\\$\\{.*\\}\\\"").nonEmpty && annotation.parameter.nonEmpty) { + val literalName = annotation.parameterAssign.code.head + val value = Option(literalName.slice(3, literalName.length - 2)).getOrElse("") + if (value.nonEmpty) { + cpg.property + .filter(p => p.name == value) + .foreach(p => { + connectEnvProperty(annotation.parameter.head, p, builder) + }) + } + } + + /** List of all parameters annotated with Spring's `Value` annotation, along with the property name. + */ + if (annotation.member.nonEmpty) { + cpg.property + .filter(p => + p.name == Option( + annotation.parameterAssign.head.code.slice(3, annotation.parameterAssign.head.code.length - 2) + ).getOrElse("") + ) + .foreach(p => { + connectEnvProperty(annotation.member.head, p, builder) + }) + } + + /** List of all methods annotated with Spring's `Value` annotation, along with the method node + */ + if (annotation.method.nonEmpty) { + val key = annotation.parameterAssign.head + cpg.property + .filter(p => p.name == Option(key.code.slice(3, key.code.length - 2)).getOrElse("")) + .foreach(p => { + val referenceMember = annotation.method.head.ast.fieldAccess.referencedMember.l.headOption.orNull + if (referenceMember != null) { + connectEnvProperty(referenceMember, p, builder) + } + }) + } + } else if (annotation.name == "Named" && annotation.parameter.nonEmpty) { + val value = annotation.parameterAssign.code.head.split("[.]").lastOption.getOrElse("") if (value.nonEmpty) { cpg.property - .filter(p => p.name == value) + .filter(p => p.name.endsWith(value)) .foreach(p => { connectEnvProperty(annotation.parameter.head, p, builder) }) } } - - /** List of all parameters annotated with Spring's `Value` annotation, along with the property name. - */ - if (annotation.member.nonEmpty) { - cpg.property - .filter(p => - p.name == Option( - annotation.parameterAssign.head.code.slice(3, annotation.parameterAssign.head.code.length - 2) - ).getOrElse("") - ) - .foreach(p => { - connectEnvProperty(annotation.member.head, p, builder) - }) - } - - /** List of all methods annotated with Spring's `Value` annotation, along with the method node - */ - if (annotation.method.nonEmpty) { - val key = annotation.parameterAssign.head - cpg.property - .filter(p => p.name == Option(key.code.slice(3, key.code.length - 2)).getOrElse("")) - .foreach(p => { - val referenceMember = annotation.method.head.ast.fieldAccess.referencedMember.l.headOption.orNull - if (referenceMember != null) { - connectEnvProperty(referenceMember, p, builder) - } - }) - } } def connectEnvProperty(literalNode: AstNode, propertyNode: JavaProperty, builder: DiffGraphBuilder): Unit = { @@ -65,3 +77,15 @@ class JavaAnnotationPropertyLinkerPass(cpg: Cpg) extends PrivadoParallelCpgPass[ builder.addEdge(literalNode, propertyNode, EdgeTypes.ORIGINAL_PROPERTY) } } + +//private def namedAnnotatedParameters(): List[(MethodParameterIn, String)] = cpg.annotation +// .nameExact("Named") +// .filter(_.parameter.nonEmpty) +// .map { x => +// val value = x.parameterAssign.code.next().split("[.]").lastOption.getOrElse("") +// (x.parameter.next(), value) +// } +// .filter { (_, value) => +// value.nonEmpty +// } +// .toList diff --git a/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/java/passes/config/JavaPropertyLinkerPass.scala @@ -0,0 +1 @@ + diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala index 2af17d8c7..f65257cc9 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala @@ -40,6 +40,7 @@ import ai.privado.languageEngine.java.tagger.collection.{ SOAPCollectionTagger } import ai.privado.languageEngine.java.tagger.config.JavaDBConfigTagger +import ai.privado.languageEngine.java.tagger.sink.api.JavaAPISinkTagger import ai.privado.languageEngine.java.tagger.sink.{InheritMethodTagger, JavaAPITagger, MessagingConsumerCustomTagger} import ai.privado.languageEngine.java.tagger.source.{IdentifierTagger, InSensitiveCallTagger} import ai.privado.tagger.PrivadoBaseTagger @@ -82,6 +83,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new JavaS3Tagger(cpg, s3DatabaseDetailsCache).createAndApply() + JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig) + new JavaAPITagger(cpg, ruleCache, privadoInputConfig).createAndApply() // Custom Rule tagging if (!privadoInputConfig.ignoreInternalRules) { diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala index 964f1b334..7fd979c83 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/FeignAPI.scala @@ -201,7 +201,9 @@ class FeignAPI(cpg: Cpg, ruleCache: RuleCache) { apiCalls.foreach(apiNode => { val domain = getDomainFromString(apiLiteral) val newRuleIdToUse = ruleInfo.id + "." + domain - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true) + ) addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, apiLiteral) }) diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala index 23195447c..e31530ee6 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/JavaAPITagger.scala @@ -28,7 +28,7 @@ import ai.privado.languageEngine.java.language.* import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator import ai.privado.languageEngine.java.tagger.Utility.{GRPCTaggerUtility, SOAPTaggerUtility} import ai.privado.metric.MetricHandler -import ai.privado.model.{Constants, Language, NodeType, RuleInfo} +import ai.privado.model.{Constants, InternalTag, Language, NodeType, RuleInfo} import ai.privado.tagger.PrivadoParallelCpgPass import ai.privado.tagger.utility.APITaggerUtility.{SERVICE_URL_REGEX_PATTERN, sinkTagger} import ai.privado.utility.{ImportUtility, Utilities} @@ -96,13 +96,17 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI APITaggerVersionJava.V2Tagger } + apis = apis.whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l + val commonHttpPackages: String = ruleCache.getSystemConfigByKey(Constants.apiHttpLibraries) - val grpcSinks = GRPCTaggerUtility.getGrpcSinks(cpg) - val soapSinks = SOAPTaggerUtility.getAPICallNodes(cpg) + val grpcSinks = GRPCTaggerUtility.getGrpcSinks(cpg).whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l + val soapSinks = + SOAPTaggerUtility.getAPICallNodes(cpg).whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l override def generateParts(): Array[_ <: AnyRef] = { ruleCache.getAllRuleInfo .filter(rule => rule.nodeType.equals(NodeType.API)) + .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules .toArray } @@ -133,7 +137,12 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI ) else List() - } + }.whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)).l + + val markedAPISinks = cpg.call + .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString)) + .whereNot(_.tag.nameExact(InternalTag.API_URL_MARKED.toString)) + .l apiTaggerToUse match { case APITaggerVersionJava.V1Tagger => @@ -150,7 +159,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI ) sinkTagger( apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource, - feignAPISinks ++ grpcSinks ++ soapSinks, + feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks, builder, ruleInfo, ruleCache, @@ -161,7 +170,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI println(s"${Calendar.getInstance().getTime} - --API TAGGER V2 invoked...") sinkTagger( apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource, - apis.methodFullName(commonHttpPackages).l ++ feignAPISinks ++ grpcSinks ++ soapSinks, + apis.methodFullName(commonHttpPackages).l ++ feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks, builder, ruleInfo, ruleCache, @@ -172,7 +181,7 @@ class JavaAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInputConfig: PrivadoI println(s"${Calendar.getInstance().getTime} - --API TAGGER SKIPPED, applying Feign client API...") sinkTagger( apiInternalSources ++ propertySources ++ identifierSource ++ serviceSource, - feignAPISinks ++ grpcSinks ++ soapSinks, + feignAPISinks ++ grpcSinks ++ soapSinks ++ markedAPISinks, builder, ruleInfo, ruleCache, diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala new file mode 100644 index 000000000..d89335984 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTagger.scala @@ -0,0 +1,28 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.model.{Constants, InternalTag} +import ai.privado.tagger.{PrivadoParallelCpgPass, PrivadoSimpleCpgPass} +import io.shiftleft.codepropertygraph.generated.{Cpg, Operators} +import io.shiftleft.codepropertygraph.generated.nodes.Call +import io.shiftleft.semanticcpg.language.* +import ai.privado.utility.Utilities.{addRuleTags, storeForTag} + +import scala.jdk.CollectionConverters.CollectionHasAsScala +import scala.language.postfixOps + +class JavaAPISinkByMethodFullNameTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoSimpleCpgPass(cpg) { + + private val apiMethodFullNameRegex = ruleCache.getSystemConfigByKey(Constants.apiMethodFullNames) + + val cacheCall: List[Call] = cpg.call.or(_.nameNot(Operators.ALL.asScala.toSeq.appended("init"): _*)).l + override def run(builder: DiffGraphBuilder): Unit = { + if (apiMethodFullNameRegex.nonEmpty) { + val sinkCalls = cacheCall.methodFullName(apiMethodFullNameRegex).toArray + + // Mark the nodes as API sink + sinkCalls.foreach(storeForTag(builder, _, ruleCache)(InternalTag.API_SINK_MARKED.toString)) + } + } + +} diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala new file mode 100644 index 000000000..340597ea0 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTagger.scala @@ -0,0 +1,64 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.model.{InternalTag, RuleInfo} +import ai.privado.tagger.PrivadoParallelCpgPass +import io.shiftleft.codepropertygraph.generated.{Cpg, Operators} +import io.shiftleft.semanticcpg.language.* + +import scala.jdk.CollectionConverters.CollectionHasAsScala +import ai.privado.utility.Utilities.storeForTag + +class JavaAPISinkByParameterTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCpgPass[String](cpg) { + override def generateParts(): Array[String] = { + + /* Below query looks for methods whose parameter names ends with `url|endpoint`, + for such method, get the typeFullName of the returned object by this method + */ + + val typeFullNameByUrlLikeMatch = cpg.method + .where(_.parameter.filter(_.index != 0).name("(?i).*(url|endpoint)")) + .signature + .map(_.split("\\(").headOption.getOrElse("")) + .filter(_.nonEmpty) + .l + + /* Below query looks for methods whose parameter names ends with `config`, and is part of a constructor + for such method, get the typeFullName of the object for which this constructor is added + */ + val typeFullNameByConfigLikeMatch = cpg.method + .where(_.parameter.filter(_.index != 0).name("(?i).*(config)")) + .where(_.name("")) + .fullName + .map(_.split("[.] + val memberName = m.name + val fileName = m.file.name.headOption.getOrElse("") + + /* Below query looks for fieldIdentifier in the given file and return calls which are made on top of this fieldIdentifier + The 1st callIn returns the field Access node and the 2nd returns the actual call + */ + + val sinkCalls = cpg.fieldAccess.fieldIdentifier + .canonicalName(memberName) + .where(_.file.nameExact(fileName)) + .l + .inCall + .inCall + .where(_.nameNot(Operators.ALL.asScala.toSeq.appended(""): _*)) + .l + + // Mark the nodes as API sink + sinkCalls.foreach(storeForTag(builder, _, ruleCache)(InternalTag.API_SINK_MARKED.toString)) + + } + } +} diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala new file mode 100644 index 000000000..23799bcc7 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkEndpointMapperByNonInitMethod.scala @@ -0,0 +1,109 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.model.{Constants, InternalTag, NodeType, RuleInfo} +import ai.privado.tagger.PrivadoParallelCpgPass +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.semanticcpg.language.* +import ai.privado.languageEngine.java.language.* +import ai.privado.tagger.utility.APITaggerUtility.{ + getLiteralCode, + resolveDomainFromSource, + tagAPIWithDomainAndUpdateRuleCache +} +import ai.privado.utility.Utilities.{addRuleTags, getDomainFromString, storeForTag} +import io.shiftleft.codepropertygraph.generated.nodes.AstNode + +class JavaAPISinkEndpointMapperByNonInitMethod(cpg: Cpg, ruleCache: RuleCache) + extends PrivadoParallelCpgPass[String](cpg) { + + private val methodFullNameSplitter = "[:(]" + + private val apiMatchingRegex = + ruleCache.getAllRuleInfo.filter(_.nodeType == NodeType.API).map(_.combinedRulePattern).mkString("(", "|", ")") + + private val thirdPartyRuleInfo = ruleCache.getRuleInfo(Constants.thirdPartiesAPIRuleId) + override def generateParts(): Array[String] = { + + /* General assumption - there is a function which creates a client, and the usage of the client and binding + happens via dependency injection which can very according to the framework used. + If we identify such a function and can point to the usage of a particular endpoint in it, + we can say the client uses the following endpoint + */ + + if (thirdPartyRuleInfo.isDefined) { + cpg.call + .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString)) + .methodFullName + .map(_.split(methodFullNameSplitter).headOption.getOrElse("")) + .filter(_.nonEmpty) + .map { methodNamespace => + val parts = methodNamespace.split("[.]") + if parts.nonEmpty then parts.dropRight(1).mkString(".") else "" + } + .dedup + .toArray + } else + Array[String]() + } + + override def runOnPart(builder: DiffGraphBuilder, typeFullName: String): Unit = { + + cpg.method.signature(s"$typeFullName$methodFullNameSplitter.*").foreach { clientReturningMethod => + val matchingProperties = clientReturningMethod.ast.originalProperty.value(apiMatchingRegex).dedup.l + + val impactedApiCalls = cpg.call + .methodFullName(s"$typeFullName.*") + .where(_.tag.nameExact(InternalTag.API_SINK_MARKED.toString)) + .l + + if (matchingProperties.nonEmpty) { + matchingProperties.foreach { propertyNode => + val domain = getDomainFromString(propertyNode.value) + impactedApiCalls.foreach { apiCall => + tagAPIWithDomainAndUpdateRuleCache( + builder, + thirdPartyRuleInfo.get, + ruleCache, + domain, + apiCall, + propertyNode + ) + storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString) + } + } + } else { // There is no property node available to be used, try with parameter + val variableRegex = ruleCache.getSystemConfigByKey(Constants.apiIdentifier) + val matchingParameters = clientReturningMethod.parameter.name(variableRegex).l + + if (matchingParameters.nonEmpty) { + matchingParameters.foreach { parameter => + val domain = resolveDomainFromSource(parameter) + impactedApiCalls.foreach { apiCall => + tagAPIWithDomainAndUpdateRuleCache(builder, thirdPartyRuleInfo.get, ruleCache, domain, apiCall, parameter) + storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString) + } + } + } else { // There is no matching parameter to be used, try with identifier + val matchingIdentifiers = clientReturningMethod.ast.isIdentifier.name(variableRegex).l + if (matchingIdentifiers.nonEmpty) { + matchingIdentifiers.foreach { identifier => + val domain = resolveDomainFromSource(identifier) + impactedApiCalls.foreach { apiCall => + tagAPIWithDomainAndUpdateRuleCache( + builder, + thirdPartyRuleInfo.get, + ruleCache, + domain, + apiCall, + identifier + ) + storeForTag(builder, apiCall, ruleCache)(InternalTag.API_URL_MARKED.toString) + } + } + } + } + } + } + } +} diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala new file mode 100644 index 000000000..f7536de3f --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala @@ -0,0 +1,25 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.entrypoint.PrivadoInput +import ai.privado.tagger.sink.api.APISinkTagger +import io.shiftleft.codepropertygraph.generated.Cpg + +object JavaAPISinkTagger extends APISinkTagger { + + /** Wrapper method to tag all the api taggers + * @param cpg + * @param ruleCache + */ + override def applyTagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput): Unit = { + + if (privadoInput.enableAPIByParameter) + new JavaAPISinkByParameterTagger(cpg, ruleCache).createAndApply() + + new JavaAPISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() + + // Invoke API Endpoint mappers + new JavaAPISinkEndpointMapperByNonInitMethod(cpg, ruleCache).createAndApply() + } + +} diff --git a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala index 4a68f5992..84d0f5f4d 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala @@ -76,7 +76,9 @@ object JavascriptProcessor { new JsonPropertyParserPass(cpg, s"$sourceRepoLocation/${Constants.generatedConfigFolderName}") .createAndApply() new JsConfigPropertyPass(cpg).createAndApply() - } else new PropertyParserPass(cpg, sourceRepoLocation, ruleCache, Language.JAVASCRIPT).createAndApply() + } else + new PropertyParserPass(cpg, sourceRepoLocation, ruleCache, Language.JAVASCRIPT, privadoInput) + .createAndApply() new JSEnvPropertyLinkerPass(cpg).createAndApply() new SQLParser(cpg, sourceRepoLocation, ruleCache).createAndApply() new DBTParserPass(cpg, sourceRepoLocation, ruleCache).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala index 204d19057..ce9559c9e 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/GraphqlAPITagger.scala @@ -61,7 +61,9 @@ class GraphqlAPITagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCp if (isReadAPI) { val newRuleIdToUse = ruleInfo.id + Constants.READ_WITH_BRACKETS - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.READ_WITH_BRACKETS)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.READ_WITH_BRACKETS, isGenerated = true) + ) addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) storeForTag(builder, apiNode, ruleCache)( Constants.apiUrl + newRuleIdToUse, @@ -70,7 +72,7 @@ class GraphqlAPITagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelCp } else if (isWriteAPI) { val newRuleIdToUse = ruleInfo.id + Constants.WRITE_WITH_BRACKETS ruleCache.setRuleInfo( - ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.WRITE_WITH_BRACKETS) + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + Constants.WRITE_WITH_BRACKETS, isGenerated = true) ) addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) storeForTag(builder, apiNode, ruleCache)( diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala index 99c50e4dc..b7bfab394 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala @@ -103,7 +103,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, scriptTag, ruleInfo, ruleCache) else { newRuleIdToUse = ruleInfo.id + "." + domain._2 - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true) + ) addRuleTags(builder, scriptTag, ruleInfo, ruleCache, Some(newRuleIdToUse)) } storeForTag(builder, scriptTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1) @@ -113,7 +115,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) if (!identifierDomain.equals(Constants.UnknownDomain)) { if (!ruleInfo.id.equals(Constants.internalAPIRuleId)) { newRuleIdToUse = ruleInfo.id + "." + identifierDomain - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + identifierDomain)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + identifierDomain, isGenerated = true) + ) addRuleTags(builder, scriptTag, ruleInfo, ruleCache, Some(newRuleIdToUse)) storeForTag(builder, scriptTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, identifierDomain) } @@ -131,7 +135,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, externalScriptCall, ruleInfo, ruleCache) else { newRuleIdToUse = ruleInfo.id + "." + domain._2 - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true) + ) addRuleTags(builder, externalScriptCall, ruleInfo, ruleCache, Some(newRuleIdToUse)) } storeForTag(builder, externalScriptCall, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1) @@ -166,7 +172,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) if (ruleInfo.id.equals(Constants.internalAPIRuleId)) addRuleTags(builder, callTag, ruleInfo, ruleCache) else { newRuleIdToUse = ruleInfo.id + "." + domain._2 - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain._2, isGenerated = true) + ) addRuleTags(builder, callTag, ruleInfo, ruleCache, Some(newRuleIdToUse)) } storeForTag(builder, callTag, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain._1) @@ -190,7 +198,9 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) // Tagging the node with respective domain val newRuleIdToUse = ruleInfo.id + "." + domain - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true) + ) addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, domain) diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala index c7b0427a0..41a6cc1bf 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/RegularSinkTagger.scala @@ -88,7 +88,9 @@ class RegularSinkTagger(cpg: Cpg, ruleCache: RuleCache) extends PrivadoParallelC case node => node.code }).stripPrefix("\"").stripSuffix("\"") val newRuleIdToUse = ruleInfo.id + "." + cookieName - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + cookieName)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + cookieName, isGenerated = true) + ) addRuleTags(builder, sink, ruleInfo, ruleCache, Some(newRuleIdToUse)) DatabaseDetailsCache.addDatabaseDetails( DatabaseDetails( diff --git a/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala new file mode 100644 index 000000000..13fad4223 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala @@ -0,0 +1,115 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ + +package ai.privado.languageEngine.php.processor + +import ai.privado.cache.* +import ai.privado.entrypoint.{PrivadoInput, TimeMetric} +import ai.privado.languageEngine.base.processor.BaseProcessor +import ai.privado.languageEngine.php.semantic.Language.tagger +import ai.privado.model.Constants.* +import ai.privado.model.Language.Language +import ai.privado.utility.Utilities.createCpgFolder +import io.joern.php2cpg.{Config, Php2Cpg} +import io.joern.x2cpg.X2Cpg.applyDefaultOverlays +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.passes.CpgPassBase +import org.slf4j.{Logger, LoggerFactory} + +import java.io.File +import java.nio.file.Paths +import java.util.Calendar + +class PhpProcessor( + ruleCache: RuleCache, + privadoInput: PrivadoInput, + sourceRepoLocation: String, + lang: Language, + dataFlowCache: DataFlowCache, + auditCache: AuditCache, + s3DatabaseDetailsCache: S3DatabaseDetailsCache +) extends BaseProcessor( + ruleCache, + privadoInput, + sourceRepoLocation, + lang, + dataFlowCache, + auditCache, + s3DatabaseDetailsCache + ) { + + override val logger: Logger = LoggerFactory.getLogger(this.getClass) + + override def applyPrivadoPasses(cpg: Cpg): List[CpgPassBase] = List[CpgPassBase]() + + override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = + cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache) + + override def applyDataflowAndPostProcessingPasses(cpg: Cpg): Unit = { + super.applyDataflowAndPostProcessingPasses(cpg) + Php2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply()) + } + + override def processCpg(): Either[String, Unit] = { + println(s"${Calendar.getInstance().getTime} - Processing source code using $lang engine") + + createCpgFolder(sourceRepoLocation) + + val cpgOutput = Paths.get(sourceRepoLocation, outputDirectoryName, cpgOutputFileName) + val cpgConfig = Config() + .withInputPath(sourceRepoLocation) + .withOutputPath(cpgOutput.toString) + .withIgnoredFilesRegex(ruleCache.getExclusionRegex) + .withPhpParserBin(PhpProcessor.parserBinPath) + + val xtocpg = new Php2Cpg().createCpg(cpgConfig).map { cpg => + println( + s"${TimeMetric.getNewTime()} - Base processing done in \t\t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}" + ) + + applyDefaultOverlays(cpg) + cpg + } + + tagAndExport(xtocpg) + } +} + +object PhpProcessor { + val parserBinPath: String = { + val dir = getClass.getProtectionDomain.getCodeSource.getLocation.toString + val indexOfLib = dir.lastIndexOf("lib") + val fixedDir = if (indexOfLib != -1) { + new File(dir.substring("file:".length, indexOfLib)).toString + } else { + val indexOfTarget = dir.lastIndexOf("target") + if (indexOfTarget != -1) { + new File(dir.substring("file:".length, indexOfTarget)).toString + } else { + "." + } + } + + Paths.get(fixedDir, "/bin/php-parser/php-parser.php").toAbsolutePath.toString + } +} diff --git a/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala b/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala new file mode 100644 index 000000000..283e89171 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/php/semantic/Language.scala @@ -0,0 +1,31 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ + +package ai.privado.languageEngine.php.semantic + +import ai.privado.languageEngine.php.tagger.PrivadoTagger +import io.shiftleft.codepropertygraph.generated.Cpg + +object Language { + implicit def tagger(cpg: Cpg): PrivadoTagger = new PrivadoTagger(cpg) +} diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala new file mode 100644 index 000000000..2ab1e0507 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala @@ -0,0 +1,56 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ + +package ai.privado.languageEngine.php.tagger + +import ai.privado.cache.{DataFlowCache, RuleCache, TaggerCache} +import ai.privado.entrypoint.PrivadoInput +import ai.privado.languageEngine.php.tagger.source.IdentifierTagger +import ai.privado.tagger.PrivadoBaseTagger +import ai.privado.tagger.sink.RegularSinkTagger +import ai.privado.tagger.source.LiteralTagger +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Tag +import io.shiftleft.semanticcpg.language.* +import org.slf4j.LoggerFactory +import overflowdb.traversal.Traversal + +class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { + private val logger = LoggerFactory.getLogger(this.getClass) + + override def runTagger( + rules: RuleCache, + taggerCache: TaggerCache, + privadoInputConfig: PrivadoInput, + dataFlowCache: DataFlowCache + ): Traversal[Tag] = { + logger.info("Beginning tagging") + + new LiteralTagger(cpg, rules).createAndApply() + new IdentifierTagger(cpg, rules, taggerCache).createAndApply() + new RegularSinkTagger(cpg, rules).createAndApply() + + logger.info("Finished tagging") + cpg.tag + } +} diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala new file mode 100644 index 000000000..e8cd8e99c --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTagger.scala @@ -0,0 +1,200 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ + +package ai.privado.languageEngine.php.tagger.source + +import ai.privado.cache.{RuleCache, TaggerCache} +import ai.privado.languageEngine.java.tagger.source.Utility.{ + getCallsMatchingReturnRegex, + getFieldAccessCallsMatchingRegex +} +import ai.privado.model.{CatLevelOne, Constants, InternalTag, RuleInfo} +import ai.privado.tagger.PrivadoParallelCpgPass +import ai.privado.utility.Utilities.{addRuleTags, storeForTag} +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.semanticcpg.language.* +import overflowdb.BatchedUpdate + +import java.util.UUID + +class IdentifierTagger(cpg: Cpg, ruleCache: RuleCache, taggerCache: TaggerCache) + extends PrivadoParallelCpgPass[RuleInfo](cpg) { + lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME: String = UUID.randomUUID.toString + lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE: String = UUID.randomUUID.toString + lazy val RANDOM_ID_OBJECT_OF_TYPE_DECL_EXTENDING_TYPE: String = UUID.randomUUID.toString + + override def generateParts(): Array[RuleInfo] = ruleCache.getRule.sources.toArray + + override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = { + val rulePattern = ruleInfo.combinedRulePattern + val regexMatchingIdentifiers = cpg.identifier(rulePattern).l + regexMatchingIdentifiers.foreach(identifier => { + storeForTag(builder, identifier, ruleCache)(InternalTag.VARIABLE_REGEX_IDENTIFIER.toString) + addRuleTags(builder, identifier, ruleInfo, ruleCache) + }) + + val regexMatchingFieldIdentifiersIdentifiers = + cpg.fieldAccess.where(_.fieldIdentifier.canonicalName(rulePattern)).isCall.l + regexMatchingFieldIdentifiersIdentifiers.foreach(identifier => { + storeForTag(builder, identifier, ruleCache)(InternalTag.VARIABLE_REGEX_IDENTIFIER.toString) + addRuleTags(builder, identifier, ruleInfo, ruleCache) + }) + + val regexMatchingMembers = cpg.member.name(rulePattern).l + regexMatchingMembers.foreach(member => { + storeForTag(builder, member, ruleCache)(InternalTag.VARIABLE_REGEX_MEMBER.toString) + addRuleTags(builder, member, ruleInfo, ruleCache) + }) + + tagObjectOfTypeDeclHavingMemberName(builder, rulePattern, ruleInfo) + } + + /** Tag identifier of all the typeDeclaration who have a member as memberName in argument Represent Step 2.1 + */ + private def tagObjectOfTypeDeclHavingMemberName( + builder: BatchedUpdate.DiffGraphBuilder, + memberNameRegex: String, + ruleInfo: RuleInfo + ): Unit = { + val typeDeclWithMemberNameHavingMemberName = cpg.typeDecl + .where(_.member.name(memberNameRegex).filterNot(item => item.name.equals(item.name.toUpperCase))) + .map(typeDeclNode => (typeDeclNode, typeDeclNode.member.name(memberNameRegex).l)) + .l + typeDeclWithMemberNameHavingMemberName + .distinctBy(_._1.fullName) + .foreach(typeDeclValEntry => { + typeDeclValEntry._2.foreach(typeDeclMember => { + val typeDeclFullName = typeDeclValEntry._1.fullName + // updating cache + taggerCache.addItemToTypeDeclMemberCache(typeDeclFullName, ruleInfo.id, typeDeclMember) + val typeDeclMemberName = typeDeclMember.name + // Have started tagging Parameters as well, as in collection points sometimes there is no referencing Identifier present for a local + val impactedObjects = + cpg.identifier + .where(_.typeFullName(typeDeclFullName)) + .l ::: cpg.parameter.where(_.typeFullName(typeDeclFullName)).l + + impactedObjects + .foreach(impactedObject => { + if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) { + storeForTag(builder, impactedObject, ruleCache)( + InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString, + ruleInfo.id + ) + storeForTag(builder, impactedObject, ruleCache)( + Constants.id, + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME + ) + storeForTag(builder, impactedObject, ruleCache)(Constants.catLevelOne, CatLevelOne.DERIVED_SOURCES.name) + } + storeForTag(builder, impactedObject, ruleCache)( + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME, + ruleInfo.id + ) + // Tag for storing memberName in derived Objects -> user --> (email, password) + storeForTag(builder, impactedObject, ruleCache)( + ruleInfo.id + Constants.underScore + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_NAME, + typeDeclMemberName + ) + }) + + // To Mark all field Access and getters + tagAllFieldAccessAndGetters(builder, typeDeclFullName, ruleInfo, typeDeclMemberName) + }) + }) + + typeDeclWithMemberNameHavingMemberName + .distinctBy(_._1.fullName) + .foreach(typeDeclValEntry => { + val typeDeclName = typeDeclValEntry._1.fullName + // Step 2.2 + tagObjectOfTypeDeclHavingMemberType(builder, typeDeclName, ruleInfo) + }) + } + + /** Tag identifier of all the typeDeclaration who have a member of type -> memberType in argument Represent Step 2.2 + */ + private def tagObjectOfTypeDeclHavingMemberType( + builder: BatchedUpdate.DiffGraphBuilder, + memberType: String, + ruleInfo: RuleInfo + ): Unit = { + // stores tuple(Member, TypeDeclFullName) + val typeDeclHavingMemberTypeTuple = + cpg.typeDecl.member.typeFullName(memberType).map(member => (member, member.typeDecl.fullName)).dedup.l + typeDeclHavingMemberTypeTuple.foreach(typeDeclTuple => { + val typeDeclVal = typeDeclTuple._2 + val typeDeclMember = typeDeclTuple._1 + taggerCache.addItemToTypeDeclMemberCache(typeDeclVal, ruleInfo.id, typeDeclMember) + val impactedObjects = + cpg.identifier.where(_.typeFullName(typeDeclVal)).l ::: cpg.parameter + .where(_.typeFullName(typeDeclVal)) + .l + impactedObjects.foreach(impactedObject => { + if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) { + storeForTag(builder, impactedObject, ruleCache)(InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_TYPE.toString) + storeForTag(builder, impactedObject, ruleCache)( + Constants.id, + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE + ) + storeForTag(builder, impactedObject, ruleCache)(Constants.catLevelOne, CatLevelOne.DERIVED_SOURCES.name) + } + storeForTag(builder, impactedObject, ruleCache)( + Constants.privadoDerived + Constants.underScore + RANDOM_ID_OBJECT_OF_TYPE_DECL_HAVING_MEMBER_TYPE, + ruleInfo.id + ) + }) + + // To Mark all field Access and getters + tagAllFieldAccessAndGetters(builder, typeDeclVal, ruleInfo, typeDeclMember.name) + }) + } + + /** Function to tag all the field access operations and all the methods whose return code matches the member regex + * + * @param builder + * @param typeDeclVal + * @param memberNameRegex + * @param ruleInfo + * @param typeDeclMemberName + */ + private def tagAllFieldAccessAndGetters( + builder: BatchedUpdate.DiffGraphBuilder, + typeDeclVal: String, + ruleInfo: RuleInfo, + typeDeclMemberName: String + ): Unit = { + val impactedGetters = getFieldAccessCallsMatchingRegex(cpg, typeDeclVal, s"($typeDeclMemberName)") + .filterNot(item => item.code.equals(item.code.toUpperCase)) + + impactedGetters.foreach(impactedGetter => { + storeForTag(builder, impactedGetter, ruleCache)(InternalTag.SENSITIVE_FIELD_ACCESS.toString) + addRuleTags(builder, impactedGetter, ruleInfo, ruleCache) + }) + + val impactedReturnMethods = getCallsMatchingReturnRegex(cpg, typeDeclVal, s"($typeDeclMemberName)") + impactedReturnMethods + .foreach(storeForTag(builder, _, ruleCache)(InternalTag.SENSITIVE_METHOD_RETURN.toString, ruleInfo.id)) + + } +} diff --git a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala index 2596cdcf4..71e681608 100644 --- a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala @@ -56,6 +56,7 @@ class PythonAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput override def generateParts(): Array[_ <: AnyRef] = { ruleCache.getRule.sinks .filter(rule => rule.nodeType.equals(NodeType.API)) + .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules .toArray } diff --git a/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala index 28d8d971d..1e2af2eb9 100644 --- a/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/ruby/tagger/sink/APITagger.scala @@ -47,6 +47,7 @@ class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) override def generateParts(): Array[_ <: AnyRef] = { ruleCache.getRule.sinks .filter(rule => rule.nodeType.equals(NodeType.API)) + .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules .toArray } diff --git a/src/main/scala/ai/privado/model/Config.scala b/src/main/scala/ai/privado/model/Config.scala index fa419e23b..5d4a00074 100644 --- a/src/main/scala/ai/privado/model/Config.scala +++ b/src/main/scala/ai/privado/model/Config.scala @@ -42,7 +42,8 @@ case class RuleInfo( catLevelOne: CatLevelOne.CatLevelOne, catLevelTwo: String, language: Language.Language, - categoryTree: Array[String] + categoryTree: Array[String], + isGenerated: Boolean = false // mark this true, if the rule is generated by privado-core ) { def combinedRulePattern: String = { patterns.mkString("(", "|", ")") diff --git a/src/main/scala/ai/privado/model/Constants.scala b/src/main/scala/ai/privado/model/Constants.scala index bc317e499..2e4acc5f5 100644 --- a/src/main/scala/ai/privado/model/Constants.scala +++ b/src/main/scala/ai/privado/model/Constants.scala @@ -54,6 +54,7 @@ object Constants { val value = "value" val MAX_SOCKET_COUNT = "maxSocketCount" val RepoPropertyConfig = "RepoPropertyConfig" + val filePath = "filePath" val third_parties = "third_parties" val internal_apis = "internal_apis" @@ -157,6 +158,7 @@ object Constants { val cookieSourceRuleId = "Data.Sensitive.OnlineIdentifiers.Cookies" val ignoredSinks = "ignoredSinks" val apiSinks = "apiSinks" + val apiMethodFullNames = "apiMethodFullNames" val apiHttpLibraries = "apiHttpLibraries" val apiIdentifier = "apiIdentifier" val apiGraphqlLibraries = "apiGraphqlLibraries" diff --git a/src/main/scala/ai/privado/model/PrivadoTag.scala b/src/main/scala/ai/privado/model/PrivadoTag.scala index b3d73415f..2beb744c2 100644 --- a/src/main/scala/ai/privado/model/PrivadoTag.scala +++ b/src/main/scala/ai/privado/model/PrivadoTag.scala @@ -45,6 +45,10 @@ object InternalTag extends Enumeration { val PROBABLE_ASSET = Value("PROBABLE_ASSET") val SOURCE_PROPERTY = Value("SOURCE_PROPERTY") + // API Tags + val API_SINK_MARKED = Value("API_SINK_MARKED") + val API_URL_MARKED = Value("API_URL_MARKED") + lazy val valuesAsString = InternalTag.values.map(value => value.toString()) } @@ -99,6 +103,7 @@ object Language extends Enumeration { val RUBY = Value("ruby") val KOTLIN = Value("kotlin") val GO = Value("go") + val PHP = Value("php") val CSHARP = Value("csharp") val DEFAULT = Value("default") val UNKNOWN = Value("unknown") diff --git a/src/main/scala/ai/privado/passes/DBTParserPass.scala b/src/main/scala/ai/privado/passes/DBTParserPass.scala index fd0ccc625..de57cba7a 100644 --- a/src/main/scala/ai/privado/passes/DBTParserPass.scala +++ b/src/main/scala/ai/privado/passes/DBTParserPass.scala @@ -171,7 +171,8 @@ class DBTParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache) extends CatLevelOne.SINKS, "storages", Language.DEFAULT, - Array[String]() + Array[String](), + true ) val dbDetails = DatabaseDetails(dbName, dbPlatform, dbHost, "", "", Some(schema)) diff --git a/src/main/scala/ai/privado/passes/PropertyParserPass.scala b/src/main/scala/ai/privado/passes/PropertyParserPass.scala index 2d9e66729..bc2081244 100644 --- a/src/main/scala/ai/privado/passes/PropertyParserPass.scala +++ b/src/main/scala/ai/privado/passes/PropertyParserPass.scala @@ -4,6 +4,7 @@ import io.shiftleft.codepropertygraph.generated.EdgeTypes import io.shiftleft.codepropertygraph.generated.nodes.NewJavaProperty import overflowdb.BatchedUpdate import ai.privado.cache.RuleCache +import ai.privado.entrypoint.PrivadoInput import io.joern.x2cpg.SourceFiles import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.NewFile @@ -45,8 +46,13 @@ object FileExtensions { val CONF = ".conf" } -class PropertyParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache, language: Language.Value) - extends PrivadoParallelCpgPass[String](cpg) { +class PropertyParserPass( + cpg: Cpg, + projectRoot: String, + ruleCache: RuleCache, + language: Language.Value, + privadoInput: PrivadoInput = PrivadoInput() +) extends PrivadoParallelCpgPass[String](cpg) { val PLACEHOLDER_TOKEN_START_END = "@@" val logger = LoggerFactory.getLogger(getClass) @@ -65,10 +71,14 @@ class PropertyParserPass(cpg: Cpg, projectRoot: String, ruleCache: RuleCache, la ).toArray } case Language.JAVASCRIPT => - configFiles( - projectRoot, - Set(FileExtensions.JSON, FileExtensions.ENV, FileExtensions.YML, FileExtensions.YAML) - ).toArray + if (privadoInput.enableIngressAndEgressUrls) { + configFiles( + projectRoot, + Set(FileExtensions.JSON, FileExtensions.ENV, FileExtensions.YAML, FileExtensions.YML) + ).toArray + } else { + configFiles(projectRoot, Set(FileExtensions.JSON, FileExtensions.ENV)).toArray + } case Language.PYTHON => configFiles( projectRoot, diff --git a/src/main/scala/ai/privado/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/tagger/sink/APITagger.scala index 6655263b3..2a85f85ff 100644 --- a/src/main/scala/ai/privado/tagger/sink/APITagger.scala +++ b/src/main/scala/ai/privado/tagger/sink/APITagger.scala @@ -58,6 +58,7 @@ class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput) override def generateParts(): Array[_ <: AnyRef] = { ruleCache.getRule.sinks .filter(rule => rule.nodeType.equals(NodeType.API)) + .filterNot(_.isGenerated) // Filter out generated rules, we only need to use the passed rules .toArray } override def runOnPart(builder: DiffGraphBuilder, ruleInfo: RuleInfo): Unit = { diff --git a/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala b/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala new file mode 100644 index 000000000..f99f41950 --- /dev/null +++ b/src/main/scala/ai/privado/tagger/sink/api/APISinkTagger.scala @@ -0,0 +1,11 @@ +package ai.privado.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.entrypoint.PrivadoInput +import io.shiftleft.codepropertygraph.generated.Cpg + +trait APISinkTagger { + + def applyTagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput): Unit = ??? + +} diff --git a/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala b/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala index 877f85c3d..3019ddcf0 100644 --- a/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala +++ b/src/main/scala/ai/privado/tagger/utility/APITaggerUtility.scala @@ -38,8 +38,9 @@ import ai.privado.utility.Utilities.{ } import io.joern.dataflowengineoss.language.* import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext} -import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, Member} +import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, JavaProperty, Member} import overflowdb.BatchedUpdate +import overflowdb.BatchedUpdate.DiffGraphBuilder object APITaggerUtility { @@ -48,8 +49,9 @@ object APITaggerUtility { def getLiteralCode(element: AstNode): String = { val literalCode = element match { - case member: Member => member.name - case _ => element.code.split(" ").last + case member: Member => member.name + case propertyNode: JavaProperty => propertyNode.value + case _ => element.code.split(" ").last } element.originalPropertyValue.getOrElse(literalCode) @@ -83,7 +85,9 @@ object APITaggerUtility { else { val domain = resolveDomainFromSource(sourceNode) newRuleIdToUse = ruleInfo.id + "." + domain - ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain)) + ruleCache.setRuleInfo( + ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true) + ) addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) } storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, getLiteralCode(sourceNode)) @@ -110,7 +114,7 @@ object APITaggerUtility { urlValue.stripPrefix("\"").stripSuffix("\"") } - private def resolveDomainFromSource(sourceNode: AstNode): String = { + def resolveDomainFromSource(sourceNode: AstNode): String = { val sourceDomain = sourceNode.originalPropertyValue.getOrElse(getLiteralCode(sourceNode)) if (sourceDomain.matches(SERVICE_URL_REGEX_PATTERN)) { sourceDomain.split("//").last @@ -118,4 +122,19 @@ object APITaggerUtility { getDomainFromString(sourceDomain) } } + + def tagAPIWithDomainAndUpdateRuleCache( + builder: DiffGraphBuilder, + ruleInfo: RuleInfo, + ruleCache: RuleCache, + domain: String, + apiNode: AstNode, + apiUrlNode: AstNode + ) = { + val newRuleIdToUse = ruleInfo.id + "." + domain + ruleCache.setRuleInfo(ruleInfo.copy(id = newRuleIdToUse, name = ruleInfo.name + " " + domain, isGenerated = true)) + addRuleTags(builder, apiNode, ruleInfo, ruleCache, Some(newRuleIdToUse)) + storeForTag(builder, apiNode, ruleCache)(Constants.apiUrl + newRuleIdToUse, getLiteralCode(apiUrlNode)) + + } } diff --git a/src/test/scala/ai/privado/RuleInfoTestData.scala b/src/test/scala/ai/privado/RuleInfoTestData.scala deleted file mode 100644 index 605a9cbff..000000000 --- a/src/test/scala/ai/privado/RuleInfoTestData.scala +++ /dev/null @@ -1,131 +0,0 @@ -package ai.privado - -import ai.privado.model.{CatLevelOne, ConfigAndRules, FilterProperty, Language, NodeType, RuleInfo} - -object RuleInfoTestData { - - val sourceRule = List( - RuleInfo( - "Data.Sensitive.FirstName", - "FirstName", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*firstName|first_name.*"), - false, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.JAVA, - Array() - ), - RuleInfo( - "Data.Sensitive.AccountData.AccountPassword", - "AccountPassword", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*password.*"), - false, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.JAVA, - Array() - ), - RuleInfo( - "Data.Sensitive.PersonalIdentification.LastName", - "LastName", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*lastName.*"), - false, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.UNKNOWN, - Array() - ), - RuleInfo( - "Data.Sensitive.PersonalIdentification.DateofBirth", - "Date of Birth", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*dob.*"), - false, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.UNKNOWN, - Array() - ), - RuleInfo( - "Data.Sensitive.ContactData.EmailAddress", - "EmailAddress", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*email.*"), - true, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.UNKNOWN, - Array() - ), - RuleInfo( - "Data.Sensitive.ContactData.PhoneNumber", - "Phone", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*phone.*"), - true, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.UNKNOWN, - Array() - ), - RuleInfo( - "Data.Sensitive.FinancialData.Salary", - "Salary", - "", - FilterProperty.METHOD_FULL_NAME, - Array(), - List("(?i).*salary.*"), - true, - "", - Map(), - NodeType.REGULAR, - "", - CatLevelOne.SOURCES, - "", - Language.UNKNOWN, - Array() - ) - ) - - val rule: ConfigAndRules = - ConfigAndRules(sourceRule, List(), List(), List(), List(), List(), List(), List(), List(), List()) -} diff --git a/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala b/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala index f775c65ef..9cbfdc29d 100644 --- a/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala +++ b/src/test/scala/ai/privado/exporter/RepoConfigMetadataExporterTest.scala @@ -22,12 +22,15 @@ class RepoConfigMetadataExporterTest extends RepoConfigMetadataExporterBase { "Test Repo config Metadata sample" should { "should return correct metadata" in { - val resultMap = RepoConfigMetaDataExporter.getMetaData(cpg, ruleCache).toMap - resultMap.keys.toList should contain("name") - resultMap("name") should equal("exampleService") + val resultSet = RepoConfigMetaDataExporter.getMetaData(cpg, ruleCache).toArray + resultSet.length shouldBe 2 + resultSet(0)("name") should equal("name") + resultSet(0)("value") should equal("exampleService") + resultSet(0)("filePath").contains("test.yaml") shouldBe true - resultMap.keys.toList should contain("config.prod.DB_HOST_NAME") - resultMap("config.prod.DB_HOST_NAME") should equal("example.com") + resultSet(1)("name") should equal("config.prod.DB_HOST_NAME") + resultSet(1)("value") should equal("example.com") + resultSet(1)("filePath").contains("test.yaml") shouldBe true } } } diff --git a/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala b/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala index 6df07214d..26ca64891 100644 --- a/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala +++ b/src/test/scala/ai/privado/languageEngine/csharp/CSharpTestBase.scala @@ -1,6 +1,5 @@ package ai.privado.languageEngine.csharp -import ai.privado.RuleInfoTestData import ai.privado.cache.{AuditCache, DataFlowCache, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.model.* @@ -20,6 +19,7 @@ import io.shiftleft.semanticcpg.layers.* import io.joern.dataflowengineoss.layers.dataflows.* import ai.privado.languageEngine.csharp.tagger.source.IdentifierTagger import ai.privado.model.SourceCodeModel +import ai.privado.rule.RuleInfoTestData import ai.privado.tagger.source.LiteralTagger abstract class CSharpTestBase extends AnyWordSpec with Matchers with BeforeAndAfterAll with BeforeAndAfterEach { diff --git a/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala b/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala new file mode 100644 index 000000000..73b52f34a --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/java/JavaTestBase.scala @@ -0,0 +1,36 @@ +package ai.privado.languageEngine.java + +import ai.privado.languageEngine.ruby.RubyTestBase.code +import ai.privado.model.SourceCodeModel +import better.files.File +import io.joern.dataflowengineoss.layers.dataflows.{OssDataFlow, OssDataFlowOptions} +import io.joern.javasrc2cpg.{Config, JavaSrc2Cpg} +import io.joern.x2cpg.X2Cpg.applyDefaultOverlays +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.semanticcpg.layers.LayerCreatorContext + +object JavaTestBase { + def code(sourceCodes: List[SourceCodeModel], applyPostProcessingPass: Boolean = false): (Cpg, Config) = { + + val (cpg, config) = code(sourceCodes) + + val context = new LayerCreatorContext(cpg) + val options = new OssDataFlowOptions() + new OssDataFlow(options).run(context) + + (cpg, config) + } + + private def code(sourceCodes: List[SourceCodeModel]): (Cpg, Config) = { + val inputDir = File.newTemporaryDirectory() + for (sourceCode <- sourceCodes) { + (inputDir / sourceCode.fileName).write(sourceCode.sourceCode) + } + val outputFile = File.newTemporaryFile() + + val config = Config().withInputPath(inputDir.pathAsString).withOutputPath(outputFile.pathAsString) + val cpg = new JavaSrc2Cpg().createCpg(config).get + applyDefaultOverlays(cpg) + (cpg, config) + } +} diff --git a/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala b/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala index 158487085..f7096d733 100644 --- a/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala +++ b/src/test/scala/ai/privado/languageEngine/java/passes/config/PropertiesFilePassTest.scala @@ -23,7 +23,7 @@ package ai.privado.languageEngine.java.passes.config -import ai.privado.cache.RuleCache +import ai.privado.cache.{AppCache, RuleCache} import ai.privado.languageEngine.java.language.* import ai.privado.model.Language import ai.privado.utility.PropertyParserPass @@ -43,6 +43,7 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") { """ |internal.logger.api.base=https://logger.privado.ai/ |slack.base.url=https://hooks.slack.com/services/some/leaking/url + |MY_ENDPOINT=http://myservice.com/user |""".stripMargin override val propertyFileContents = "" @@ -60,7 +61,7 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") { | |public AuthenticationService(UserRepository userr, SessionsR sesr, ModelMapper mapper, | ObjectMapper objectMapper, @Qualifier("ApiCaller") ExecutorService apiExecutor, SlackStub slackStub, - | SendGridStub sgStub, @Value("${internal.logger.api.base}") String loggerBaseURL) { + | SendGridStub sgStub, @Value("${internal.logger.api.base}") String loggerBaseURL, @Named(Constants.MY_ENDPOINT) String endpoint) { | } | |@Value("${internal.logger.api.base}") @@ -74,25 +75,28 @@ class AnnotationTests extends PropertiesFilePassTestBase(".properties") { "ConfigFilePass" should { "connect annotated parameter to property" in { val anno: List[AstNode] = cpg.property.usedAt.l - anno.length shouldBe 3 + anno.length shouldBe 4 anno.code.l shouldBe List( "@Value(\"${internal.logger.api.base}\") String loggerBaseURL", "java.lang.String loggerUrl", + "@Named(Constants.MY_ENDPOINT) String endpoint", "java.lang.String slackWebHookURL" ) } "connect property to annotated parameter" in { - // cpg.property.usedAt.originalProperty.l.length shouldBe 3 + cpg.property.usedAt.originalProperty.l.length shouldBe 4 cpg.property.usedAt.originalProperty.name.l shouldBe List( "internal.logger.api.base", "internal.logger.api.base", + "MY_ENDPOINT", "slack.base.url" ) cpg.property.usedAt.originalProperty.value.l shouldBe List( "https://logger.privado.ai/", "https://logger.privado.ai/", + "http://myservice.com/user", "https://hooks.slack.com/services/some/leaking/url" ) } @@ -194,6 +198,25 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") { |spring: | application: | name: basepath + |false-positive-entries: + | urls: + | - http: + | path1: en-wrapper/0.5.6/maven-wrapper-0.5.6.jar + | path2: che-maven/3.6.3/apache-maven-3.6.3-bin.zip + | path3: dkr.ecr.us-west-2.amazonaws.com/infrastructure/ecr-pusher:latest + | path4: mvn -U -P ${ENVIRONMENT} package -DskipTests --settings ${home}/.m2/settings.xml + | path5: somename.jpg + | path6: somename.png + | path7: somename.gif + | path8: string having html tags

hello

and world + | path9: /a/b/c containing spaces + | path10: github.com/a/b/c + | pathe11: ../some/file/path + | path12: #somecomment + | path13: ///a/b/c + | path14: ./some/file/path + | + | |mx-record-delete: | events: | - http: @@ -208,6 +231,12 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") { | - ssm: | path: / | method: PUT + | - privado: + | path: https://code.privado.ai/repositories + | method: PUT + | - privado-without-http: + | path: code.privado.ai/repositories + | method: PUT |""".stripMargin override val codeFileContents = """ @@ -216,12 +245,14 @@ class EgressPropertyTests extends PropertiesFilePassTestBase(".yaml") { override val propertyFileContents = "" - "Fetch egress urls from property files" ignore { + "Fetch egress urls from property files" should { "Check egress urls" in { - val egressExporter = HttpConnectionMetadataExporter(cpg, new RuleCache) - val List(url1, url2) = egressExporter.getEgressUrls + val egressExporter = HttpConnectionMetadataExporter(cpg, new RuleCache) + val List(url1, url2, url3, url4) = egressExporter.getEgressUrls url1 shouldBe "/v1/student/{id}" url2 shouldBe "v1/student/{id}" + url3 shouldBe "https://code.privado.ai/repositories" + url4 shouldBe "code.privado.ai/repositories" } "Check egress urls with single char" in { @@ -338,6 +369,7 @@ abstract class PropertiesFilePassTestBase(fileExtension: String) new PropertyParserPass(cpg, inputDir.toString(), new RuleCache, Language.JAVA).createAndApply() new JavaEnvPropertyLinkerPass(cpg).createAndApply() new JavaAnnotationPropertyLinkerPass(cpg).createAndApply() + AppCache.repoLanguage = Language.JAVA super.beforeAll() } diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala new file mode 100644 index 000000000..db10fce78 --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByMethodFullNameTaggerTest.scala @@ -0,0 +1,74 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.entrypoint.PrivadoInput +import org.scalatest.BeforeAndAfterAll +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import ai.privado.languageEngine.java.JavaTestBase.* +import ai.privado.model.{ConfigAndRules, Constants, InternalTag, Language, SourceCodeModel, SystemConfig} +import ai.privado.rule.RuleInfoTestData +import io.shiftleft.semanticcpg.language.* + +class JavaAPISinkByMethodFullNameTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll { + + "call which match api methodFullName regex" should { + "match" in { + + val (cpg, config) = code( + List( + SourceCodeModel( + """ + | + |import java.io.BufferedReader; + |import java.io.IOException; + |import java.io.InputStreamReader; + |import java.net.HttpURLConnection; + |import java.net.URL; + | + |public class HttpRequestExample { + | public static void main(String[] args) { + | try { + | // Specify the URL to send the request to + | URL url = new URL("https://jsonplaceholder.typicode.com/posts/1"); + | + | // Open a connection to the URL + | HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + | + | // Set request method to GET + | connection.setRequestMethod("GET"); + | + | // Get the response code + | int responseCode = connection.getResponseCode(); + | } + | } + |} + |""".stripMargin, + "HttpRequestExample.java" + ) + ) + ) + + val ruleCache = RuleCache() + ruleCache.setRule( + ConfigAndRules(systemConfig = + List( + SystemConfig( + Constants.apiMethodFullNames, + "java.net.HttpURLConnection.*", + Language.UNKNOWN, + "", + Array[String]() + ) + ) + ) + ) + JavaAPISinkTagger.applyTagger(cpg, ruleCache = ruleCache, privadoInput = PrivadoInput()) + + val apiSinks = cpg.call("getResponseCode").l + + apiSinks.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1 + } + } + +} diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala new file mode 100644 index 000000000..90478f3e9 --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkByParameterTaggerTest.scala @@ -0,0 +1,103 @@ +package ai.privado.languageEngine.java.tagger.sink.api + +import ai.privado.cache.RuleCache +import ai.privado.entrypoint.PrivadoInput +import org.scalatest.BeforeAndAfterAll +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import ai.privado.languageEngine.java.JavaTestBase.* +import ai.privado.languageEngine.java.tagger.sink.JavaAPITagger +import ai.privado.model.{CatLevelOne, Constants, InternalTag, Language, NodeType, SourceCodeModel, SystemConfig} +import ai.privado.rule.RuleInfoTestData +import io.shiftleft.semanticcpg.language.* + +class JavaAPISinkByParameterTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll { + + "Api by matching a variable like parameter" should { + "be tagged as a API sink" in { + + val (cpg, config) = code( + List( + SourceCodeModel( + """ + |import java.util.List; + |import ai.privado.client.Client; + | + |public class EndpointClient { + | + | private String config; + | + | public EndpointClient(String config) { + | this.config = config; + | } + | + | + | public Client getClient(String endpoint) { + | // Logic to create and return a client based on the endpoint + | Client client = new Client(endpoint); + | return client; + | } + |} + | + |""".stripMargin, + "EndpointClient.java" + ), + SourceCodeModel( + """ + |import java.util.List; + |import ai.privado.client.Client; + | + |public class Main { + | private Client client; + | + | private EndpointClient endpointClient; + | + | public Main(String endpoint, String config) { + | this.client = new Client(); + | this.endpointClient = new EndpointClient(config); + | } + | + | public List getAllDetails() { + | + | + | return client.getAllDetails(); // This should be marked as API Sink by url like matching + | } + | + | public List getDetailsByEndpoint() { + | String url = "https://www.myproduction.com/user/endpoint"; + | + | return endpointClient.getDetailsByEndpoint(url); // This should be marked as API Sink by config like matching + | + | } + |} + |""".stripMargin, + "Main.java" + ) + ) + ) + + val privadoInput = PrivadoInput(enableAPIByParameter = true) + val ruleCache = RuleCache() + val systemConfig = + List(SystemConfig(Constants.apiIdentifier, "(?i).*endpoint.*", Language.UNKNOWN, "", Array[String]())) + ruleCache.setRule(RuleInfoTestData.rule.copy(systemConfig = systemConfig)) + JavaAPISinkTagger.applyTagger(cpg, ruleCache = ruleCache, privadoInput = privadoInput) + + new JavaAPITagger(cpg, ruleCache, privadoInputConfig = privadoInput).createAndApply() + + val apiSink = cpg.call("getAllDetails").l + apiSink.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1 + apiSink.tag.nameExact(Constants.catLevelOne).value.headOption shouldBe Some(CatLevelOne.SINKS.name) + apiSink.tag.nameExact(Constants.nodeType).value.headOption shouldBe Some(NodeType.API.toString) + + apiSink.tag.nameExact(Constants.id).value.l shouldBe List("Sinks.ThirdParties.API.endpoint") + apiSink.tag.nameExact(Constants.apiUrl + "Sinks.ThirdParties.API.endpoint").value.l shouldBe List("endpoint") + + val apiSinkByEndpoint = cpg.call("getDetailsByEndpoint").l + apiSinkByEndpoint.tag.nameExact(InternalTag.API_SINK_MARKED.toString).size shouldBe 1 + apiSinkByEndpoint.tag.nameExact(Constants.catLevelOne).value.headOption shouldBe Some(CatLevelOne.SINKS.name) + apiSinkByEndpoint.tag.nameExact(Constants.nodeType).value.headOption shouldBe Some(NodeType.API.toString) + } + } + +} diff --git a/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala b/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala new file mode 100644 index 000000000..5c0091bef --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/php/PhpTestBase.scala @@ -0,0 +1,92 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ + +package ai.privado.languageEngine.php + +import ai.privado.rule.RuleInfoTestData +import ai.privado.cache.* +import ai.privado.entrypoint.PrivadoInput +import ai.privado.languageEngine.php.processor.PhpProcessor +import ai.privado.languageEngine.php.tagger.source.IdentifierTagger +import ai.privado.model.* +import ai.privado.tagger.source.LiteralTagger +import ai.privado.threatEngine.ThreatEngineExecutor +import better.files.File +import io.joern.php2cpg.{Config, Php2Cpg} +import io.joern.x2cpg.X2Cpg +import io.shiftleft.codepropertygraph.generated.Cpg +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} + +import scala.collection.mutable + +abstract class PhpTestBase extends AnyWordSpec with Matchers with BeforeAndAfterAll with BeforeAndAfterEach { + private val cpgs = mutable.ArrayBuffer.empty[Cpg] + private val outPutFiles = mutable.ArrayBuffer.empty[File] + private val inputDirs = mutable.ArrayBuffer.empty[File] + + val taggerCache = new TaggerCache() + + val configAndRules: ConfigAndRules = + ConfigAndRules(RuleInfoTestData.sourceRule, List(), List(), List(), List(), List(), List(), List(), List(), List()) + + def code(code: String): (Cpg, ThreatEngineExecutor) = { + val ruleCache = new RuleCache() + val auditCache = new AuditCache() + val privadoInput = PrivadoInput() + val dataFlowCache = new DataFlowCache(privadoInput, auditCache) + + val inputDir = File.newTemporaryDirectory() + inputDirs.addOne(inputDir) + (inputDir / "main.php").write(code) + + val outputFile: File = File.newTemporaryFile() + outPutFiles.addOne(outputFile) + val config = Config() + .withInputPath(inputDir.pathAsString) + .withOutputPath(outputFile.pathAsString) + .withPhpParserBin(PhpProcessor.parserBinPath) + + ruleCache.setRule(configAndRules) + val cpg = new Php2Cpg().createCpg(config).get + AppCache.repoLanguage = Language.PHP + + X2Cpg.applyDefaultOverlays(cpg) + Php2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply()) + new IdentifierTagger(cpg, ruleCache, taggerCache).createAndApply() + new LiteralTagger(cpg, ruleCache).createAndApply() + + cpgs.addOne(cpg) + val threatEngine = + new ThreatEngineExecutor( + cpg, + config.inputPath, + ruleCache, + null, + dataFlowCache.getDataflowAfterDedup, + privadoInput + ) + (cpg, threatEngine) + } +} diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala new file mode 100644 index 000000000..e2f28bc21 --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/FieldIdentifierTaggingTests.scala @@ -0,0 +1,54 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ +package ai.privado.languageEngine.php.tagger.source + +import ai.privado.languageEngine.php.PhpTestBase +import ai.privado.model.* +import io.shiftleft.semanticcpg.language.* + +class FieldIdentifierTaggingTests extends PhpTestBase { + "Field access in code" should { + "be tagged as part of identifier tagger" in { + val (cpg, _) = code(""" + |firstName = "John"; + | add_phone("phone"); + | } + | + | function add_phone($ph) { + | // + | } + |} + |?> + |""".stripMargin) + + val List(firstNameField) = cpg.fieldAccess.l + firstNameField.code shouldBe "$this->firstName" + firstNameField.tag.nameExact(Constants.catLevelOne).value.l shouldBe List(CatLevelOne.SOURCES.name) + } + } +} diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala new file mode 100644 index 000000000..12c902adf --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/IdentifierTaggingTest.scala @@ -0,0 +1,74 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ +package ai.privado.languageEngine.php.tagger.source + +import ai.privado.languageEngine.php.PhpTestBase +import ai.privado.model.* +import io.shiftleft.semanticcpg.language.* + +class IdentifierTaggingTest extends PhpTestBase { + "Tagging derived sources" should { + val (cpg, _) = code(""" + |firstName = $fname; + | $this->lastName = $lname; + | $this->age = $userAge; + | $this->email = $userEmail; + | $this->dob = $userDob; + | } + | } + | + | $user = new User("a", "b", 1, "c@d.com", "01-01-90"); + | echo $user->firstName; + |?> + | + |""".stripMargin) + + "tag member in a structure" in { + cpg.member("firstName").tag.nameExact(Constants.id).value.l shouldBe List("Data.Sensitive.FirstName") + + cpg.member("dob").tag.nameExact(Constants.id).value.l shouldBe List( + "Data.Sensitive.PersonalIdentification.DateofBirth" + ) + } + + "be tagged as part of identifier tagger" in { + val userObj = cpg.identifier("user").lineNumber(20).l + userObj.tag + .where(_.nameExact(InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString)) + .value + .head shouldBe "Data.Sensitive.FirstName" + userObj.tag.where(_.nameExact(Constants.id)).size shouldBe 1 + userObj.tag.where(_.nameExact(Constants.catLevelOne)).value.l shouldBe List(CatLevelOne.DERIVED_SOURCES.name) + } + } +} diff --git a/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala b/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala new file mode 100644 index 000000000..f62b21773 --- /dev/null +++ b/src/test/scala/ai/privado/languageEngine/php/tagger/source/LiteralTaggingTests.scala @@ -0,0 +1,50 @@ +/* + * This file is part of Privado OSS. + * + * Privado is an open source static code analysis tool to discover data flows in the code. + * Copyright (C) 2022 Privado, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * For more information, contact support@privado.ai + * + */ +package ai.privado.languageEngine.php.tagger.source + +import ai.privado.languageEngine.php.PhpTestBase +import ai.privado.model.* +import io.shiftleft.semanticcpg.language.* + +class LiteralTaggingTests extends PhpTestBase { + "Literals in code" should { + "be tagged as part of LiteralTagger" in { + val (cpg, _) = code(""" + | + |""".stripMargin) + + val literals = cpg.literal.l + literals.last.code shouldBe "\"phone\"" + literals.last.tag.nameExact(Constants.catLevelOne).value.l shouldBe List(CatLevelOne.SOURCES.name) + } + } +} diff --git a/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala index 5cd0cb1dd..ac67a85d8 100644 --- a/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala +++ b/src/test/scala/ai/privado/languageEngine/ruby/monolith/MonolithTest.scala @@ -1,6 +1,5 @@ package ai.privado.languageEngine.ruby.monolith -import ai.privado.RuleInfoTestData import ai.privado.cache.{AppCache, AuditCache, DataFlowCache, RuleCache, S3DatabaseDetailsCache, TaggerCache} import ai.privado.dataflow.Dataflow import ai.privado.entrypoint.PrivadoInput @@ -9,6 +8,7 @@ import ai.privado.languageEngine.base.processor.BaseProcessor import ai.privado.languageEngine.go.tagger.source.IdentifierTagger import ai.privado.languageEngine.ruby.tagger.monolith.MonolithTagger import ai.privado.model.{Constants, Language} +import ai.privado.rule.RuleInfoTestData import ai.privado.utility.PropertyParserPass import better.files.File import io.joern.dataflowengineoss.language.Path diff --git a/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala index e78c8d71a..b643ca432 100644 --- a/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala +++ b/src/test/scala/ai/privado/languageEngine/ruby/passes/SchemaParserTest.scala @@ -1,6 +1,5 @@ package ai.privado.languageEngine.ruby.passes -import ai.privado.RuleInfoTestData import ai.privado.cache.RuleCache import ai.privado.languageEngine.ruby.RubyTestBase.* import ai.privado.languageEngine.ruby.passes.SchemaParser @@ -19,6 +18,7 @@ import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import ai.privado.model.SourceCodeModel +import ai.privado.rule.RuleInfoTestData class SchemaParserTest extends AnyWordSpec with Matchers with BeforeAndAfterAll { diff --git a/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala b/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala index 63e89d8ee..2107a8ef5 100644 --- a/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala +++ b/src/test/scala/ai/privado/languageEngine/ruby/tagger/source/RubyLiteralDerivedTaggerTest.scala @@ -1,6 +1,5 @@ package ai.privado.languageEngine.ruby.tagger.source -import ai.privado.RuleInfoTestData import ai.privado.cache.RuleCache import ai.privado.languageEngine.ruby.RubyTestBase.* import ai.privado.languageEngine.ruby.passes.SchemaParser @@ -19,6 +18,7 @@ import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import ai.privado.model.SourceCodeModel +import ai.privado.rule.RuleInfoTestData class RubyLiteralDerivedTaggerTest extends AnyWordSpec with Matchers with BeforeAndAfterAll { diff --git a/src/test/scala/ai/privado/model/SourceCodeModel.scala b/src/test/scala/ai/privado/model/SourceCodeModel.scala index 11f52ce26..8b62c3ddd 100644 --- a/src/test/scala/ai/privado/model/SourceCodeModel.scala +++ b/src/test/scala/ai/privado/model/SourceCodeModel.scala @@ -1,3 +1,2 @@ package ai.privado.model - case class SourceCodeModel(sourceCode: String, fileName: String) diff --git a/src/test/scala/ai/privado/rule/RuleInfoTestData.scala b/src/test/scala/ai/privado/rule/RuleInfoTestData.scala new file mode 100644 index 000000000..0f907f151 --- /dev/null +++ b/src/test/scala/ai/privado/rule/RuleInfoTestData.scala @@ -0,0 +1,23 @@ +package ai.privado.rule + +import ai.privado.model.* +import ai.privado.rule.SourceRuleTestData._ +import ai.privado.rule.SinkRuleTestData._ + +object RuleInfoTestData { + + val sourceRule = List( + firstNameSourceRule, + accountPasswordSourceRule, + lastNameSourceRule, + dobSourceRule, + emailSourceRule, + phoneNumberSourceRule, + salarySourceRule + ) + + val sinkRule = List(thirdPartyAPIRule) + + val rule: ConfigAndRules = + ConfigAndRules(sources = sourceRule, sinks = sinkRule) +} diff --git a/src/test/scala/ai/privado/rule/SinkRuleTestData.scala b/src/test/scala/ai/privado/rule/SinkRuleTestData.scala new file mode 100644 index 000000000..d8c51864b --- /dev/null +++ b/src/test/scala/ai/privado/rule/SinkRuleTestData.scala @@ -0,0 +1,27 @@ +package ai.privado.rule + +import ai.privado.model.{CatLevelOne, Constants, FilterProperty, Language, NodeType, RuleInfo} + +object SinkRuleTestData { + + val thirdPartyAPIRule = RuleInfo( + Constants.thirdPartiesAPIRuleId, + "Third Party API", + "", + FilterProperty.METHOD_FULL_NAME, + Array(), + List( + "((?i)((?:http:|https:|ftp:|ssh:|udp:|wss:){0,1}(\\/){0,2}[a-zA-Z0-9_-][^)\\/(#|,!>\\s]{1,50}\\.(?:com|net|org|de|in|uk|us|io|gov|cn|ml|ai|ly|dev|cloud|me|icu|ru|info|top|tk|tr|cn|ga|cf|nl)).*(?