Skip to content

Commit

Permalink
Merge pull request #874 from Privado-Inc/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
khemrajrathore authored Dec 12, 2023
2 parents 122b3ea + a0ddd1a commit 7fafe80
Show file tree
Hide file tree
Showing 49 changed files with 887 additions and 331 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Privado Core
=============================================

Branch structure
Branch structure

main - This branch will contain the released version of the code.

Expand Down
8 changes: 4 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ThisBuild / version := sys.env.getOrElse("BUILD_VERSION", "dev-SNAPSHOT")
// parsed by project/Versions.scala, updated by updateDependencies.sh

val cpgVersion = "1.4.32"
val joernVersion = "2.0.181"
val joernVersion = "2.0.189"
val overflowdbVersion = "1.181"
val requests = "0.8.0"
val upickle = "3.1.2"
Expand Down Expand Up @@ -52,7 +52,7 @@ libraryDependencies ++= Seq(
"com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % jacksonVersion exclude ("org.yaml", "snakeyaml"),
"com.github.wnameless.json" % "json-flattener" % "0.14.0",
"org.apache.logging.log4j" % "log4j-core" % "2.19.0",
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % "2.19.0" % Runtime,
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % "2.19.0",
"org.apache.poi" % "poi-ooxml" % "5.2.2",
"com.github.jsqlparser" % "jsqlparser" % "4.6",
"org.apache.maven" % "maven-model" % "3.9.0",
Expand Down Expand Up @@ -124,7 +124,7 @@ goAstGenBinaryNames := { Seq(GoAstgenWin, GoAstgenLinux, GoAstgenLinuxArm, GoAst

lazy val goAstGenDlTask = taskKey[Unit](s"Download goastgen binaries")
goAstGenDlTask := {
val goAstGenDir = baseDirectory.value / "bin" / "goastgen"
val goAstGenDir = baseDirectory.value / "bin" / "astgen"
goAstGenDir.mkdirs()

goAstGenBinaryNames.value.foreach { fileName =>
Expand All @@ -136,7 +136,7 @@ goAstGenDlTask := {
}
}

val distDir = (Universal / stagingDirectory).value / "bin" / "goastgen"
val distDir = (Universal / stagingDirectory).value / "bin" / "astgen"
distDir.mkdirs()
IO.copyDirectory(goAstGenDir, distDir)

Expand Down
2 changes: 1 addition & 1 deletion log4j2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %logger{36} %c{0}: %msg%n"/>
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %logger{36} %F:%L: %msg%n"/>
</Console>
</Appenders>
<Loggers>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/log4j2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %c{0}: %msg%n"/>
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %logger{36} %F:%L: %msg%n"/>
</Console>
</Appenders>
<Loggers>
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/ai/privado/audit/DataFlowReport.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ai.privado.audit

import ai.privado.cache.{AuditCache, DataFlowCache}
import ai.privado.cache.AuditCache

import scala.collection.mutable.ListBuffer

Expand Down
8 changes: 2 additions & 6 deletions src/main/scala/ai/privado/cache/AuditCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,7 @@ class AuditCache {
dataflowMapByPathId
}

def addIntoBeforeFirstDedup(
dataFlow: mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]
): Unit = {
def addIntoBeforeFirstDedup(dataFlow: Map[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]): Unit = {
dataFlow.foreach(flow => {
flow._2.foreach(fileInfo => {
fileInfo._2.foreach(dataflowModel => {
Expand All @@ -156,9 +154,7 @@ class AuditCache {
def checkFlowExistInFirstDedup(sourcePathInfo: SourcePathInfo): Boolean =
flowPathBeforeFirstDedup.contains(sourcePathInfo)

def addIntoBeforeSecondDedup(
dataFlow: mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]
): Unit = {
def addIntoBeforeSecondDedup(dataFlow: Map[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]): Unit = {
dataFlow.foreach(flow => {
flow._2.foreach(fileInfo => {
fileInfo._2.foreach(dataflowModel => {
Expand Down
73 changes: 43 additions & 30 deletions src/main/scala/ai/privado/cache/DataFlowCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
package ai.privado.cache

import ai.privado.dataflow.DuplicateFlowProcessor
import ai.privado.entrypoint.{PrivadoInput, ScanProcessor}
import ai.privado.entrypoint.PrivadoInput
import ai.privado.model.exporter.{
DataFlowPathIntermediateModel,
DataFlowSinkIntermediateModel,
Expand All @@ -40,21 +40,20 @@ import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
import scala.collection.mutable
import scala.collection.mutable.ListBuffer

class DataFlowCache(auditCache: AuditCache) {
class DataFlowCache(privadoInput: PrivadoInput, auditCache: AuditCache) {

val dataflowsMapByType: ConcurrentMap[String, Path] = new ConcurrentHashMap[String, Path]()

val dataflow = mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]()
// Contains all the dataflow mapping without applying Deduplication
private val dataflow: mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]] =
mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]()

lazy val finalDataflow: List[DataFlowPathModel] = {
val extraFlows = {
if (!ScanProcessor.config.disableDeDuplication)
setDataflowWithdedupAndReturnDataflowsWithApplyDedupFalse(auditCache)
else
List()
}

dataflow.flatMap(_._2.values.flatMap(_.toList)).toList ::: extraFlows
// Contains all the dataflows after looking at the necessary dedup flag
private lazy val finalDataflow: List[DataFlowPathModel] = {
if (!privadoInput.disableDeDuplication)
DataFlowCache.getDataflowAfterApplyingDedupLogic(privadoInput, auditCache, dataflowsMapByType, dataflow.toMap)
else
dataflow.flatMap(_._2.values.flatMap(_.toList)).toList
}

var intermediateDataFlow: List[DataFlowPathIntermediateModel] = List[DataFlowPathIntermediateModel]()
Expand All @@ -76,7 +75,8 @@ class DataFlowCache(auditCache: AuditCache) {
dataflow(sourceId)(fileLineNo).append(dataFlowPathModel)
}

def getDataflow: List[DataFlowPathModel] = finalDataflow
def getDataflowAfterDedup: List[DataFlowPathModel] = finalDataflow
def getDataflowBeforeDedup: List[DataFlowPathModel] = dataflow.flatMap(_._2.values.flatMap(_.toList)).toList

def getIntermediateDataFlow(): List[DataFlowPathIntermediateModel] = intermediateDataFlow

Expand All @@ -100,9 +100,25 @@ class DataFlowCache(auditCache: AuditCache) {
intermediateSourceResult.toList
}

private def setDataflowWithdedupAndReturnDataflowsWithApplyDedupFalse(auditCache: AuditCache) = {
}

object DataFlowCache {

/** Returns all the dataflows after applying dedup logic
*
* @param auditCache
* @return
*/
def getDataflowAfterApplyingDedupLogic(
privadoInput: PrivadoInput,
auditCache: AuditCache,
dataflowsMapByType: ConcurrentMap[String, Path],
dataflow: Map[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]
) = {

println(s"${Calendar.getInstance().getTime} - Deduplicating data flows...")
val dataflowAfterDedup = mutable.HashMap[String, mutable.HashMap[String, ListBuffer[DataFlowPathModel]]]()

def addToMap(dataFlowPathModel: DataFlowPathModel): Unit = {

val pathId = dataFlowPathModel.pathId
Expand All @@ -112,32 +128,32 @@ class DataFlowCache(auditCache: AuditCache) {
val flowSize = dataflowsMapByType.get(pathId).elements.size
val sourceId = dataFlowPathModel.sourceId

if (!dataflow.contains(sourceId)) {
dataflow(sourceId) = new mutable.HashMap().addOne(fileLineNo, ListBuffer())
} else if (!dataflow(sourceId).contains(fileLineNo)) {
dataflow(sourceId)(fileLineNo) = ListBuffer()
if (!dataflowAfterDedup.contains(sourceId)) {
dataflowAfterDedup(sourceId) = new mutable.HashMap().addOne(fileLineNo, ListBuffer())
} else if (!dataflowAfterDedup(sourceId).contains(fileLineNo)) {
dataflowAfterDedup(sourceId)(fileLineNo) = ListBuffer()
}

if (dataflow(sourceId)(fileLineNo).nonEmpty) {
val currentDataFlowPathModel = dataflow(sourceId)(fileLineNo).head
if (dataflowAfterDedup(sourceId)(fileLineNo).nonEmpty) {
val currentDataFlowPathModel = dataflowAfterDedup(sourceId)(fileLineNo).head
val currentPathId = currentDataFlowPathModel.pathId
val currentSinkNodeWithLocation = dataflowsMapByType.get(currentPathId).elements.last.location

val currentFileLineNo =
currentSinkNodeWithLocation.lineNumber
.getOrElse(0)
.toString + currentSinkNodeWithLocation.filename + currentDataFlowPathModel.sinkId
val currentFlowSize = dataflowsMapByType.get(dataflow(sourceId)(fileLineNo).head.pathId).elements.size
val currentFlowSize = dataflowsMapByType.get(dataflowAfterDedup(sourceId)(fileLineNo).head.pathId).elements.size
if (currentFileLineNo.equals(fileLineNo) && flowSize < currentFlowSize) {
dataflow(sourceId)(fileLineNo) = ListBuffer[DataFlowPathModel](dataFlowPathModel)
dataflowAfterDedup(sourceId)(fileLineNo) = ListBuffer[DataFlowPathModel](dataFlowPathModel)
}
} else {
dataflow(sourceId)(fileLineNo) = ListBuffer[DataFlowPathModel](dataFlowPathModel)
dataflowAfterDedup(sourceId)(fileLineNo) = ListBuffer[DataFlowPathModel](dataFlowPathModel)
}

}

if (ScanProcessor.config.generateAuditReport) {
if (privadoInput.generateAuditReport) {
auditCache.addIntoBeforeFirstDedup(dataflow)
}

Expand All @@ -152,11 +168,9 @@ class DataFlowCache(auditCache: AuditCache) {
})
(sourceId, filteredFileLineNumberMap)
})
val flowsWithAppyDataflowFalse = dataflow.flatMap(_._2.values.flatMap(_.filterNot(_.applyDedup).toList)).toList
// clear the content and set fresh content
dataflow.clear()
val flowsWithApplyDedupFalse = dataflow.flatMap(_._2.values.flatMap(_.filterNot(_.applyDedup).toList)).toList

if (ScanProcessor.config.generateAuditReport) {
if (privadoInput.generateAuditReport) {
auditCache.addIntoBeforeSecondDedup(filteredSourceIdMap)
}

Expand All @@ -165,7 +179,6 @@ class DataFlowCache(auditCache: AuditCache) {
fileLineNoEntry._2.foreach(dfpm => addToMap(dfpm))
})
})
flowsWithAppyDataflowFalse
dataflowAfterDedup.flatMap(_._2.values.flatMap(_.toList)).toList ::: flowsWithApplyDedupFalse
}

}
8 changes: 3 additions & 5 deletions src/main/scala/ai/privado/dataflow/Dataflow.scala
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,12 @@ class Dataflow(cpg: Cpg) {
dataFlowCache,
auditCache
)
println(
s"${TimeMetric.getNewTime()} - --Filtering flows 2 is done in \t\t\t- ${TimeMetric
.setNewTimeToStageLastAndGetTimeDiff()} - Final flows - ${dataFlowCache.dataflow.values.flatMap(_.values).flatten.size}"
)
println(s"${TimeMetric.getNewTime()} - --Filtering flows 2 is done in \t\t\t- ${TimeMetric
.setNewTimeToStageLastAndGetTimeDiff()} - Final flows - ${dataFlowCache.getDataflowBeforeDedup.size}")
}
// Need to return the filtered result
println(s"${Calendar.getInstance().getTime} - --Deduplicating flows invoked...")
val dataflowFromCache = dataFlowCache.getDataflow
val dataflowFromCache = dataFlowCache.getDataflowAfterDedup
println(s"${TimeMetric.getNewTime()} - --Deduplicating flows is done in \t\t- ${TimeMetric
.setNewTimeToStageLastAndGetTimeDiff()} - Unique flows - ${dataflowFromCache.size}")
auditCache.addIntoFinalPath(dataflowFromCache)
Expand Down
8 changes: 7 additions & 1 deletion src/main/scala/ai/privado/entrypoint/CommandParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ case class PrivadoInput(
enableAuditSemanticsFilter: Boolean = false,
limitNoSinksForDataflows: Int = -1,
limitArgExpansionDataflows: Int = -1,
offlineMode: Boolean = false
offlineMode: Boolean = false,
isMonolith: Boolean = false
)

object CommandConstants {
Expand Down Expand Up @@ -103,6 +104,7 @@ object CommandConstants {
val LIMIT_ARG_EXPANSION_FOR_DATAFLOWS_ABBR = "laefd"
val OFFLINE_MODE = "offline-mode"
val OFFLINE_MODE_ABBR = "om"
val IS_MONOLITH = "monolith"
}

object CommandParser {
Expand Down Expand Up @@ -173,6 +175,10 @@ object CommandParser {
.optional()
.action((_, c) => c.copy(offlineMode = true))
.text("Offline mode"),
opt[Unit](CommandConstants.IS_MONOLITH)
.optional()
.action((_, c) => c.copy(isMonolith = true))
.text("Split repository as a monolith repo"),
opt[Unit](CommandConstants.DISABLE_2ND_LEVEL_CLOSURE)
.abbr(CommandConstants.DISABLE_2ND_LEVEL_CLOSURE_ABBR)
.optional()
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ object ScanProcessor extends CommandProcessor {

private val auditCache = new AuditCache
private def getDataflowCache: DataFlowCache = {
new DataFlowCache(auditCache)
new DataFlowCache(config, auditCache)
}

/** Helper function to process rule for a language
Expand Down Expand Up @@ -380,6 +380,7 @@ object ScanProcessor extends CommandProcessor {
println(s"${Calendar.getInstance().getTime} - Detected language 'Ruby'")
RubyProcessor.createRubyCpg(
getProcessedRule(Set(Language.RUBY)),
this.config,
sourceRepoLocation,
lang,
dataFlowCache = getDataflowCache,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,22 @@ import overflowdb.traversal.Traversal
import scala.collection.mutable
import scala.collection.mutable.ListBuffer

class AndroidPermissionsExporter(cpg: Cpg, ruleCache: RuleCache) {
class AndroidPermissionsExporter(cpg: Cpg, ruleCache: RuleCache, repoItemTagName: Option[String] = None) {

private val logger = LoggerFactory.getLogger(getClass)

def getPermissions: List[AndroidPermissionModel] = {
val permissions = ListBuffer[AndroidPermissionModel]()
try {
// take only those nodes that have source tags
cpg.androidXmlPermissionNode
.where(_.tag.nameExact(Constants.catLevelOne).valueExact(Constants.sources))
ExporterUtility
.filterNodeBasedOnRepoItemTagName(
cpg.androidXmlPermissionNode
.where(_.tag.nameExact(Constants.catLevelOne).valueExact(Constants.sources))
.l,
repoItemTagName
)
.collectAll[AndroidXmlPermissionNode]
.foreach(node => {
getPermissionDetail(node) match
case Some(permissionDetail) =>
Expand Down
Loading

0 comments on commit 7fafe80

Please sign in to comment.