From 36a131b48e96b845dc0698ddb39ea4cd1e85c71a Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Tue, 12 May 2020 23:02:10 -0400
Subject: [PATCH 1/9] Used scalafix and scalafmt to clean up the code.

---
 .../scala/org/renci/umls/CodeMapper.scala     |  89 ++++---
 .../scala/org/renci/umls/db/DbConcepts.scala  | 247 ++++++++++--------
 .../scala/org/renci/umls/db/DbHierarchy.scala |  38 +--
 .../scala/org/renci/umls/rrf/RRFCols.scala    |  36 ++-
 .../org/renci/umls/rrf/RRFConcepts.scala      |  87 +++---
 .../scala/org/renci/umls/rrf/RRFDir.scala     |   9 +-
 .../scala/org/renci/umls/rrf/RRFFile.scala    |   3 +-
 .../scala/org/renci/umls/rrf/RRFFiles.scala   |   8 +-
 .../org/renci/umls/rrf/RRFHierarchy.scala     |  40 ++-
 9 files changed, 301 insertions(+), 256 deletions(-)

diff --git a/src/main/scala/org/renci/umls/CodeMapper.scala b/src/main/scala/org/renci/umls/CodeMapper.scala
index ca0251f..88f8ff9 100644
--- a/src/main/scala/org/renci/umls/CodeMapper.scala
+++ b/src/main/scala/org/renci/umls/CodeMapper.scala
@@ -38,21 +38,14 @@ object CodeMapper extends App with LazyLogging {
       default = Some(new File("./sqlite.db"))
     )
 
-    val fromSource: ScallopOption[String] = opt[String](
-      descr = "The source to translate from"
-    )
+    val fromSource: ScallopOption[String] = opt[String](descr = "The source to translate from")
 
-    val toSource: ScallopOption[String] = opt[String](
-      descr = "The source to translate to"
-    )
+    val toSource: ScallopOption[String] = opt[String](descr = "The source to translate to")
 
-    val idFile: ScallopOption[File] = opt[File](
-      descr = "A file containing identifiers (in a single, newline-delimited column)"
-    )
+    val idFile: ScallopOption[File] =
+      opt[File](descr = "A file containing identifiers (in a single, newline-delimited column)")
 
-    val outputFile: ScallopOption[File] = opt[File](
-      descr = "Where to write the output file"
-    )
+    val outputFile: ScallopOption[File] = opt[File](descr = "Where to write the output file")
 
     verify()
   }
@@ -83,7 +76,9 @@ object CodeMapper extends App with LazyLogging {
     // Do we need to filter first?
 
     // Get ready to write output!
-    val stream = if (conf.outputFile.isEmpty) System.out else new PrintStream(new FileOutputStream(conf.outputFile()))
+    val stream =
+      if (conf.outputFile.isEmpty) System.out
+      else new PrintStream(new FileOutputStream(conf.outputFile()))
 
     // Both sourceFrom and sourceTo are set!
     if (conf.idFile.isEmpty) {
@@ -92,9 +87,9 @@ object CodeMapper extends App with LazyLogging {
       maps.foreach(map => {
         stream.println(
           s"${map.fromSource}\t${map.fromCode}\t" +
-          s"${map.toSource}\t${map.toCode}\t" +
-          s"${map.conceptIds.mkString(", ")}\t" +
-          s"${map.labels.mkString("|")}"
+            s"${map.toSource}\t${map.toCode}\t" +
+            s"${map.conceptIds.mkString(", ")}\t" +
+            s"${map.labels.mkString("|")}"
         )
       })
     } else {
@@ -105,33 +100,43 @@ object CodeMapper extends App with LazyLogging {
       val map = concepts.getMap(conf.fromSource(), ids, conf.toSource(), Seq.empty)
       val allTermCuis = concepts.getCUIsForCodes(conf.fromSource(), ids)
 
-      stream.println("fromSource\tid\tcuis\tlabels\tcountDirect\tcountViaParent\ttoIds\ttoLabels\tparentCuis\tparentSource\tparentIds\tparentLabels")
+      stream.println(
+        "fromSource\tid\tcuis\tlabels\tcountDirect\tcountViaParent\ttoIds\ttoLabels\tparentCuis\tparentSource\tparentIds\tparentLabels"
+      )
 
       var count = 0
       val mapByFromId = map.groupBy(_.fromCode)
       val matched = ids.map(id => {
         val maps = mapByFromId.getOrElse(id, Seq())
-        val (parentStr, parentHalfMaps) = if (maps.nonEmpty) ("", Seq.empty) else {
-          val termCuis = allTermCuis.getOrElse(id, Seq.empty)
-          // logger.info(s"Checking $termCuis for parent AUI information.")
-
-          val termAtomIds = concepts.getAUIsForCUIs(termCuis)
-          val parentAtomIds = rrfDir.hierarchy.getParents(termAtomIds)
-          val parentCUIs = concepts.getCUIsForAUI(parentAtomIds.toSeq)
-          val halfMaps = if(parentCUIs.isEmpty) Seq.empty else concepts.getMapsByCUIs(parentCUIs.toSeq, conf.toSource())
-
-          val cuis = halfMaps.map(_.cui).toSet
-          val sources = halfMaps.map(_.source).toSet
-          val codes = halfMaps.map(_.code).toSet
-          val labels = halfMaps.map(_.label).toSet
-
-          (s"\t${cuis.mkString("|")}\t${sources.mkString("|")}\t${codes.mkString("|")}\t${labels.mkString("|")}", halfMaps)
-        }
+        val (parentStr, parentHalfMaps) =
+          if (maps.nonEmpty) ("", Seq.empty)
+          else {
+            val termCuis = allTermCuis.getOrElse(id, Seq.empty)
+            // logger.info(s"Checking $termCuis for parent AUI information.")
+
+            val termAtomIds = concepts.getAUIsForCUIs(termCuis)
+            val parentAtomIds = rrfDir.hierarchy.getParents(termAtomIds)
+            val parentCUIs = concepts.getCUIsForAUI(parentAtomIds.toSeq)
+            val halfMaps =
+              if (parentCUIs.isEmpty) Seq.empty
+              else concepts.getMapsByCUIs(parentCUIs.toSeq, conf.toSource())
+
+            val cuis = halfMaps.map(_.cui).toSet
+            val sources = halfMaps.map(_.source).toSet
+            val codes = halfMaps.map(_.code).toSet
+            val labels = halfMaps.map(_.label).toSet
+
+            (s"\t${cuis.mkString("|")}\t${sources.mkString("|")}\t${codes.mkString("|")}\t${labels
+              .mkString("|")}", halfMaps)
+          }
 
         val halfMaps = halfMapByCode.getOrElse(id, Seq())
 
         stream.println(
-          s"${conf.fromSource()}\t$id\t${halfMaps.map(_.cui).toSet.mkString("|")}\t${halfMaps.map(_.label).toSet.mkString("|")}\t${maps.size}\t${parentHalfMaps.size}"
+          s"${conf.fromSource()}\t$id\t${halfMaps.map(_.cui).toSet.mkString("|")}\t${halfMaps
+            .map(_.label)
+            .toSet
+            .mkString("|")}\t${maps.size}\t${parentHalfMaps.size}"
             + s"\t${maps.map(m => m.toSource + ":" + m.toCode).mkString("|")}"
             + s"\t${maps.map(_.labels.mkString(";")).mkString("|")}"
             + s"$parentStr"
@@ -139,7 +144,7 @@ object CodeMapper extends App with LazyLogging {
 
         count += 1
         if (count % 100 == 0) {
-          val percentage = count.toFloat/ids.size * 100
+          val percentage = count.toFloat / ids.size * 100
           logger.info(f"Processed $count out of ${ids.size} IDs ($percentage%.2f%%)")
         }
 
@@ -150,12 +155,16 @@ object CodeMapper extends App with LazyLogging {
       val matchedParent = matched.filter(_._2.nonEmpty).flatMap(_._2)
       val matchedTotal = matched.filter(m => m._1.nonEmpty || m._2.nonEmpty)
 
-      val percentageTerm = (matchedTerm.size.toFloat/ids.size) * 100
-      val percentageParent = (matchedParent.size.toFloat/ids.size) * 100
-      val percentageTotal = (matchedTotal.size.toFloat/ids.size) * 100
+      val percentageTerm = (matchedTerm.size.toFloat / ids.size) * 100
+      val percentageParent = (matchedParent.size.toFloat / ids.size) * 100
+      val percentageTotal = (matchedTotal.size.toFloat / ids.size) * 100
       logger.info(f"Matched ${matchedTerm.size} IDs out of ${ids.size} ($percentageTerm%.2f%%)")
-      logger.info(f"Matched a further ${matchedParent.size} IDs via the parent term ($percentageParent%.2f%%)")
-      logger.info(f"Total coverage: ${matchedTotal.size} IDs out of ${ids.size} ($percentageTotal%.2f%%)")
+      logger.info(
+        f"Matched a further ${matchedParent.size} IDs via the parent term ($percentageParent%.2f%%)"
+      )
+      logger.info(
+        f"Total coverage: ${matchedTotal.size} IDs out of ${ids.size} ($percentageTotal%.2f%%)"
+      )
     }
 
     stream.close()
diff --git a/src/main/scala/org/renci/umls/db/DbConcepts.scala b/src/main/scala/org/renci/umls/db/DbConcepts.scala
index abc6a19..a7be05b 100644
--- a/src/main/scala/org/renci/umls/db/DbConcepts.scala
+++ b/src/main/scala/org/renci/umls/db/DbConcepts.scala
@@ -19,7 +19,9 @@ import scala.collection.mutable
 import scala.io.Source
 
 /** A wrapper for RRFConcepts that uses SQLite */
-class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RRFConcepts(file, filename) with LazyLogging {
+class DbConcepts(db: ConnectionFactory, file: File, filename: String)
+    extends RRFConcepts(file, filename)
+    with LazyLogging {
   implicit val halfMapCache: Cache[Seq[HalfMap]] = CaffeineCache[Seq[HalfMap]]
 
   /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
@@ -63,7 +65,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     val insertStmt = conn.prepareStatement(
       s"INSERT INTO $tableName (CUI, LAT, TS, LUI, STT, SUI, ISPREF, AUI, SAUI, SCUI, SDUI, SAB, TTY, CODE, STR, SRL, SUPPRESS, CVF) " +
-      "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+        "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
     )
 
     var count = 0
@@ -77,7 +79,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
       count += 1
       if (count % 100000 == 0) {
-        val percentage = count.toFloat/rowCount*100
+        val percentage = count.toFloat / rowCount * 100
         logger.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
         insertStmt.executeBatch()
         insertStmt.clearBatch()
@@ -96,10 +98,12 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
   def getSources(): Seq[(String, Int)] = {
     val conn = db.createConnection()
     val query = conn.createStatement()
-    val rs = query.executeQuery(s"SELECT SAB, COUNT(*) AS count FROM $tableName GROUP BY SAB ORDER BY count DESC;")
+    val rs = query.executeQuery(
+      s"SELECT SAB, COUNT(*) AS count FROM $tableName GROUP BY SAB ORDER BY count DESC;"
+    )
 
     var results = Seq[(String, Int)]()
-    while(rs.next()) {
+    while (rs.next()) {
       results = results :+ (
         rs.getString(1),
         rs.getInt(2)
@@ -111,56 +115,22 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
   }
 
   // We use the CUIs to map everything from the fromSource to the toSource.
-  case class HalfMap(cui: String, aui: String, source: String, code:String, label:String)
-
-  def getHalfMapsForCodes(source: String, ids: Seq[String]): Seq[HalfMap] = memoizeSync(Some(2.seconds)) {
-    // Retrieve all the fromIds.
-    val conn = db.createConnection()
-    if (ids.isEmpty) {
-      val query = conn.prepareStatement(s"SELECT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=?")
-      query.setString(1, source)
-      val rs = query.executeQuery()
-
-      logger.info(s"Loading halfmaps for $source")
-      var halfMap = Seq[HalfMap]()
-      var count = 0
-      while(rs.next()) {
-        halfMap = HalfMap(
-          rs.getString(1),
-          rs.getString(2),
-          rs.getString(3),
-          rs.getString(4),
-          rs.getString(5)
-        ) +: halfMap
-        count += 1
-        if (count % 100000 == 0) {
-          logger.info(s"Loaded $count halfmaps.")
-        }
-      }
-
-      conn.close()
-      logger.info(s"${halfMap.size} halfmaps loaded.")
-
-      halfMap
-    } else {
-      logger.info(s"Loading halfmaps for $source with identifiers: $ids.")
-
-      var halfMap = Seq[HalfMap]()
-      var count = 0
-
-      val windowSize = (ids.size/10) + 1
-      ids.sliding(windowSize, windowSize).foreach(idGroup => {
-        val indexedIds = idGroup.toIndexedSeq
-        val questions = idGroup.map(_ => "?").mkString(", ")
-        val query = conn.prepareStatement(s"SELECT DISTINCT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=? AND CODE IN ($questions)")
-
+  case class HalfMap(cui: String, aui: String, source: String, code: String, label: String)
+
+  def getHalfMapsForCodes(source: String, ids: Seq[String]): Seq[HalfMap] =
+    memoizeSync(Some(2.seconds)) {
+      // Retrieve all the fromIds.
+      val conn = db.createConnection()
+      if (ids.isEmpty) {
+        val query =
+          conn.prepareStatement(s"SELECT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=?")
         query.setString(1, source)
-        (0 until idGroup.size).foreach(id => {
-          query.setString(id + 2, indexedIds(id))
-        })
-
         val rs = query.executeQuery()
-        while(rs.next()) {
+
+        logger.info(s"Loading halfmaps for $source")
+        var halfMap = Seq[HalfMap]()
+        var count = 0
+        while (rs.next()) {
           halfMap = HalfMap(
             rs.getString(1),
             rs.getString(2),
@@ -169,17 +139,57 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
             rs.getString(5)
           ) +: halfMap
           count += 1
+          if (count % 100000 == 0) {
+            logger.info(s"Loaded $count halfmaps.")
+          }
         }
 
-        logger.info(s"Loaded $count halfmaps.")
-      })
-
-      conn.close()
-      logger.info(s"${halfMap.size} halfmaps loaded.")
-
-      halfMap
+        conn.close()
+        logger.info(s"${halfMap.size} halfmaps loaded.")
+
+        halfMap
+      } else {
+        logger.info(s"Loading halfmaps for $source with identifiers: $ids.")
+
+        var halfMap = Seq[HalfMap]()
+        var count = 0
+
+        val windowSize = (ids.size / 10) + 1
+        ids
+          .sliding(windowSize, windowSize)
+          .foreach(idGroup => {
+            val indexedIds = idGroup.toIndexedSeq
+            val questions = idGroup.map(_ => "?").mkString(", ")
+            val query = conn.prepareStatement(
+              s"SELECT DISTINCT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=? AND CODE IN ($questions)"
+            )
+
+            query.setString(1, source)
+            (0 until idGroup.size).foreach(id => {
+              query.setString(id + 2, indexedIds(id))
+            })
+
+            val rs = query.executeQuery()
+            while (rs.next()) {
+              halfMap = HalfMap(
+                rs.getString(1),
+                rs.getString(2),
+                rs.getString(3),
+                rs.getString(4),
+                rs.getString(5)
+              ) +: halfMap
+              count += 1
+            }
+
+            logger.info(s"Loaded $count halfmaps.")
+          })
+
+        conn.close()
+        logger.info(s"${halfMap.size} halfmaps loaded.")
+
+        halfMap
+      }
     }
-  }
 
   case class Mapping(
     fromSource: String,
@@ -190,65 +200,69 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
     atomIds: Set[String],
     labels: Set[String]
   )
-  def getMap(fromSource: String, fromIds: Seq[String], toSource: String, toIds: Seq[String]): Seq[Mapping] = {
+  def getMap(
+    fromSource: String,
+    fromIds: Seq[String],
+    toSource: String,
+    toIds: Seq[String]
+  ): Seq[Mapping] = {
     val fromHalfMaps = getHalfMapsForCodes(fromSource, fromIds)
     val toHalfMaps = getHalfMapsForCodes(toSource, toIds)
 
     // Combine the halfmaps so we need to.
-    (fromHalfMaps ++ toHalfMaps).groupBy(_.cui).values.flatMap({ entries =>
-      // Everything in entries is the "same" concept according to MRCONSO.
-      // So we partition this based on
-      val cuis = entries.map(_.cui).toSet
-      val auis = entries.map(_.aui).toSet
-      val labels = entries.map(_.label).toSet
-      val fromCodes = entries.filter(_.source == fromSource).map(_.code).toSet[String]
-      val toCodes = entries.filter(_.source == toSource).map(_.code).toSet[String]
-
-      fromCodes.flatMap(fromCode => {
-        toCodes.map(toCode => {
-          Mapping(
-            fromSource,
-            fromCode,
-            toSource,
-            toCode,
-            cuis,
-            auis,
-            labels
-          )
+    (fromHalfMaps ++ toHalfMaps)
+      .groupBy(_.cui)
+      .values
+      .flatMap({ entries =>
+        // Everything in entries is the "same" concept according to MRCONSO.
+        // So we partition this based on
+        val cuis = entries.map(_.cui).toSet
+        val auis = entries.map(_.aui).toSet
+        val labels = entries.map(_.label).toSet
+        val fromCodes = entries.filter(_.source == fromSource).map(_.code).toSet[String]
+        val toCodes = entries.filter(_.source == toSource).map(_.code).toSet[String]
+
+        fromCodes.flatMap(fromCode => {
+          toCodes.map(toCode => {
+            Mapping(fromSource, fromCode, toSource, toCode, cuis, auis, labels)
+          })
         })
       })
-    }).toSeq
+      .toSeq
   }
 
   // Look up maps by CUIs.
   // TODO: we might want to be able to call this without source.
-  def getMapsByCUIs(cuis: Seq[String], toSource: String): Seq[HalfMap] = memoizeSync(Some(2.seconds)) {
-    if (cuis.isEmpty) return Seq()
+  def getMapsByCUIs(cuis: Seq[String], toSource: String): Seq[HalfMap] =
+    memoizeSync(Some(2.seconds)) {
+      if (cuis.isEmpty) return Seq()
+
+      val conn = db.createConnection()
+      val questions = cuis.map(_ => "?").mkString(", ")
+      val query = conn.prepareStatement(
+        s"SELECT DISTINCT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=? AND CUI IN ($questions)"
+      )
+      query.setString(1, toSource)
+      val indexedSeq = cuis.toIndexedSeq
+      (1 to cuis.size).foreach(index => {
+        query.setString(index + 1, indexedSeq(index - 1))
+      })
 
-    val conn = db.createConnection()
-    val questions = cuis.map(_ => "?").mkString(", ")
-    val query = conn.prepareStatement(s"SELECT DISTINCT CUI, AUI, SAB, CODE, STR FROM $tableName WHERE SAB=? AND CUI IN ($questions)")
-    query.setString(1, toSource)
-    val indexedSeq = cuis.toIndexedSeq
-    (1 to cuis.size).foreach(index => {
-      query.setString(index + 1, indexedSeq(index - 1))
-    })
+      var halfMaps = Seq[HalfMap]()
+      val rs = query.executeQuery()
+      while (rs.next()) {
+        halfMaps = HalfMap(
+          rs.getString(1),
+          rs.getString(2),
+          rs.getString(3),
+          rs.getString(4),
+          rs.getString(5)
+        ) +: halfMaps
+      }
+      conn.close()
 
-    var halfMaps = Seq[HalfMap]()
-    val rs = query.executeQuery()
-    while(rs.next()) {
-      halfMaps = HalfMap(
-        rs.getString(1),
-        rs.getString(2),
-        rs.getString(3),
-        rs.getString(4),
-        rs.getString(5)
-      ) +: halfMaps
+      halfMaps
     }
-    conn.close()
-
-    halfMaps
-  }
 
   // Get the CUIs for given AUIs.
   def getCUIsForAUI(auis: Seq[String]): Set[String] = {
@@ -256,7 +270,8 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     val conn = db.createConnection()
     val questions = auis.map(_ => "?").mkString(", ")
-    val query = conn.prepareStatement(s"SELECT DISTINCT CUI FROM $tableName WHERE AUI IN ($questions)")
+    val query =
+      conn.prepareStatement(s"SELECT DISTINCT CUI FROM $tableName WHERE AUI IN ($questions)")
     val indexedSeq = auis.toIndexedSeq
     (1 to auis.size).foreach(index => {
       query.setString(index, indexedSeq(index - 1))
@@ -264,7 +279,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     var results = Seq[String]()
     val rs = query.executeQuery()
-    while(rs.next()) {
+    while (rs.next()) {
       results = rs.getString(1) +: results
     }
     conn.close()
@@ -277,7 +292,8 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     val conn = db.createConnection()
     val questions = cuis.map(_ => "?").mkString(", ")
-    val query = conn.prepareStatement(s"SELECT DISTINCT AUI FROM $tableName WHERE CUI IN ($questions)")
+    val query =
+      conn.prepareStatement(s"SELECT DISTINCT AUI FROM $tableName WHERE CUI IN ($questions)")
     val indexedSeq = cuis.toIndexedSeq
     (1 to cuis.size).foreach(index => {
       query.setString(index, indexedSeq(index - 1))
@@ -285,7 +301,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     var results = Seq[String]()
     val rs = query.executeQuery()
-    while(rs.next()) {
+    while (rs.next()) {
       results = rs.getString(1) +: results
     }
     conn.close()
@@ -298,7 +314,9 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     val conn = db.createConnection()
     val questions = ids.map(_ => "?").mkString(", ")
-    val query = conn.prepareStatement(s"SELECT DISTINCT CODE, CUI FROM $tableName WHERE SAB=? AND CODE IN ($questions)")
+    val query = conn.prepareStatement(
+      s"SELECT DISTINCT CODE, CUI FROM $tableName WHERE SAB=? AND CODE IN ($questions)"
+    )
     query.setString(1, source)
     val indexedSeq = ids.toIndexedSeq
     (1 to ids.size).foreach(index => {
@@ -307,7 +325,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
     var results = Seq[(String, String)]()
     val rs = query.executeQuery()
-    while(rs.next()) {
+    while (rs.next()) {
       results = (rs.getString(1), rs.getString(2)) +: results
     }
     conn.close()
@@ -318,5 +336,6 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String) extends RR
 
 object DbConcepts {
   /** Wrap an RRF file using a database to cache results. */
-  def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) = new DbConcepts(db, rrfFile.file, rrfFile.filename)
+  def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) =
+    new DbConcepts(db, rrfFile.file, rrfFile.filename)
 }
diff --git a/src/main/scala/org/renci/umls/db/DbHierarchy.scala b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
index e4dcc1c..0e451cd 100644
--- a/src/main/scala/org/renci/umls/db/DbHierarchy.scala
+++ b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
@@ -15,19 +15,21 @@ import scala.io.Source
 
 /** Represents a single hierarchy entry. */
 case class HierarchyEntry(
-                           ConceptId: String,                  // CUI
-                           AtomId: String,                     // AUI
-                           ContextNumber: String,              // CXN
-                           ParentAtomId: String,               // PAUI
-                           Source: String,                     // SAB
-                           Relation: String,                   // RELA
-                           PathToRoot: String,                 // PTR
-                           HierarchyCode: String,              // HCD
-                           ContentViewFlag: String             // CVF
-                         )
+  ConceptId: String, // CUI
+  AtomId: String, // AUI
+  ContextNumber: String, // CXN
+  ParentAtomId: String, // PAUI
+  Source: String, // SAB
+  Relation: String, // RELA
+  PathToRoot: String, // PTR
+  HierarchyCode: String, // HCD
+  ContentViewFlag: String // CVF
+)
 
 /** A wrapper for RRFHierarchy that uses SQLite */
-class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends RRFHierarchy(file, filename) with LazyLogging {
+class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
+    extends RRFHierarchy(file, filename)
+    with LazyLogging {
   /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
   val tableName: String = "MRHIER_" + sha256
 
@@ -60,7 +62,7 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends R
 
     val insertStmt = conn.prepareStatement(
       s"INSERT INTO $tableName (CUI, AUI, CXN, PAUI, SAB, RELA, PTR, HCD, CVF) " +
-      "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
+        "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
     )
 
     var count = 0
@@ -74,7 +76,7 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends R
 
       count += 1
       if (count % 100000 == 0) {
-        val percentage = count.toFloat/rowCount*100
+        val percentage = count.toFloat / rowCount * 100
         logger.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
         insertStmt.executeBatch()
         insertStmt.clearBatch()
@@ -95,7 +97,8 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends R
 
     val conn = db.createConnection()
     val questions = atomIds.map(_ => "?").mkString(", ")
-    val query = conn.prepareStatement(s"SELECT DISTINCT PAUI FROM $tableName WHERE AUI IN ($questions)")
+    val query =
+      conn.prepareStatement(s"SELECT DISTINCT PAUI FROM $tableName WHERE AUI IN ($questions)")
     val indexedSeq = atomIds.toIndexedSeq
     (1 to atomIds.size).foreach(index => {
       query.setString(index, indexedSeq(index - 1))
@@ -103,7 +106,7 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends R
 
     var results = Seq[String]()
     val rs = query.executeQuery()
-    while(rs.next()) {
+    while (rs.next()) {
       results = rs.getString(1) +: results
     }
     conn.close()
@@ -114,5 +117,6 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String) extends R
 
 object DbHierarchy {
   /** Wrap an RRF file using a database to cache results. */
-  def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) = new DbHierarchy(db, rrfFile.file, rrfFile.filename)
-}
\ No newline at end of file
+  def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) =
+    new DbHierarchy(db, rrfFile.file, rrfFile.filename)
+}
diff --git a/src/main/scala/org/renci/umls/rrf/RRFCols.scala b/src/main/scala/org/renci/umls/rrf/RRFCols.scala
index 09874cd..5bc9117 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFCols.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFCols.scala
@@ -23,26 +23,34 @@ class RRFCols(file: File, filename: String = "MRCOLS.RRF") extends RRFFile(file,
     // We'll just hard-code this for now.
     // Eventually, it'd be nice to have this automatically settable from MRCOLS.RRF itself, but
     // right now I just don't have the time.
-    rows.map(arr => Column(
-      arr(0),
-      arr(1),
-      arr(2),
-      arr(3).trim.toIntOption,
-      arr(4).trim.toFloatOption,
-      arr(5).trim.toIntOption,
-      arr(6),
-      arr(7)
-    ))
+    rows.map(
+      arr =>
+        Column(
+          arr(0),
+          arr(1),
+          arr(2),
+          arr(3).trim.toIntOption,
+          arr(4).trim.toFloatOption,
+          arr(5).trim.toIntOption,
+          arr(6),
+          arr(7)
+        )
+    )
   }
 
   /** Retrieve a column by name. */
-  def getColumn(name: String, filename: String): Seq[Column] = columns.filter(_.Filename == filename).filter(_.Name == name)
+  def getColumn(name: String, filename: String): Seq[Column] =
+    columns.filter(_.Filename == filename).filter(_.Name == name)
   def getOnlyColumn(name: String, filename: String): Column = {
     val results = getColumn(name, filename)
     if (results.size < 1)
-      throw new RuntimeException(s"No column named $name found for filename $filename in ${this.filename}")
+      throw new RuntimeException(
+        s"No column named $name found for filename $filename in ${this.filename}"
+      )
     else if (results.size > 1)
-      throw new RuntimeException(s"Too many columns named $name found for filename $filename in ${this.filename}: $results")
+      throw new RuntimeException(
+        s"Too many columns named $name found for filename $filename in ${this.filename}: $results"
+      )
     else results.head
   }
 }
@@ -50,4 +58,4 @@ class RRFCols(file: File, filename: String = "MRCOLS.RRF") extends RRFFile(file,
 object RRFCols {
   /** Wrap an RRF file as an RRFCols. */
   def fromRRF(rrfFile: RRFFile) = new RRFCols(rrfFile.file, rrfFile.filename)
-}
\ No newline at end of file
+}
diff --git a/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala b/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
index 026c35e..436281a 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
@@ -4,24 +4,24 @@ import java.io.File
 
 /** Represents a single column entry. */
 case class Concept(
-  ConceptID: String,            // CUI
-  Lang: String,                 // LAT
-  TermStatus: String,           // TS
-  TermID: String,               // LUI
-  StringType: String,           // STT
-  StringID: String,             // SUI
-  IsPreferred: Boolean,         // ISPREF
-  AtomID: String,               // AUI
-  SourceAtomID: String,         // SAUI
-  SourceConceptID: String,      // SCUI
-  SourceDescriptorID: String,   // SDUI
-  Source: String,               // SAB
-  TermType: String,             // TTY
-  SourceEntryID: String,        // CODE
-  EntryString: String,          // STR
-  SourceRestriction: String,    // SRL
-  SuppressibleFlag: String,     // SUPPRESS
-  ContentViewFlag: String       // CVF
+  ConceptID: String, // CUI
+  Lang: String, // LAT
+  TermStatus: String, // TS
+  TermID: String, // LUI
+  StringType: String, // STT
+  StringID: String, // SUI
+  IsPreferred: Boolean, // ISPREF
+  AtomID: String, // AUI
+  SourceAtomID: String, // SAUI
+  SourceConceptID: String, // SCUI
+  SourceDescriptorID: String, // SDUI
+  Source: String, // SAB
+  TermType: String, // TTY
+  SourceEntryID: String, // CODE
+  EntryString: String, // STR
+  SourceRestriction: String, // SRL
+  SuppressibleFlag: String, // SUPPRESS
+  ContentViewFlag: String // CVF
 )
 
 /**
@@ -33,33 +33,36 @@ class RRFConcepts(file: File, filename: String = "MRCONSO.RRF") extends RRFFile(
     // We'll just hard-code this for now.
     // Eventually, it'd be nice to have this automatically settable from MRFILES.RRF itself, but
     // right now I just don't have the time.
-    rows.map(arr => Concept(
-      arr(0),
-      arr(1),
-      arr(2),
-      arr(3),
-      arr(4),
-      arr(5),
-      arr(6).trim match {
-        case "Y" => true
-        case _ => false
-      },
-      arr(7),
-      arr(8),
-      arr(9),
-      arr(10),
-      arr(12),
-      arr(13),
-      arr(14),
-      arr(15),
-      arr(16),
-      arr(17),
-      arr(18)
-    ))
+    rows.map(
+      arr =>
+        Concept(
+          arr(0),
+          arr(1),
+          arr(2),
+          arr(3),
+          arr(4),
+          arr(5),
+          arr(6).trim match {
+            case "Y" => true
+            case _   => false
+          },
+          arr(7),
+          arr(8),
+          arr(9),
+          arr(10),
+          arr(12),
+          arr(13),
+          arr(14),
+          arr(15),
+          arr(16),
+          arr(17),
+          arr(18)
+        )
+    )
   }
 }
 
 object RRFConcepts {
   /** Wrap an RRF file as an RRFCols. */
   def fromRRF(rrfFile: RRFFile) = new RRFConcepts(rrfFile.file, rrfFile.filename)
-}
\ No newline at end of file
+}
diff --git a/src/main/scala/org/renci/umls/rrf/RRFDir.scala b/src/main/scala/org/renci/umls/rrf/RRFDir.scala
index 063fe47..fb3448f 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFDir.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFDir.scala
@@ -20,14 +20,19 @@ class RRFDir(dir: File, sqliteDbFile: File) {
   def getFile(filename: String): File = {
     val file = new File(dir, filename)
 
-    if (!file.exists()) throw new RuntimeException(s"Directory ${dir.getCanonicalPath} does not contain expected file $filename.")
+    if (!file.exists())
+      throw new RuntimeException(
+        s"Directory ${dir.getCanonicalPath} does not contain expected file $filename."
+      )
 
     file
   }
   def getRRFFile(filename: String): RRFFile = new RRFFile(getFile(filename), filename)
 
   /** Set up an SQLite database for us to use. */
-  lazy val sqliteDb:DriverManagerConnectionFactory = new DriverManagerConnectionFactory("jdbc:sqlite:" + sqliteDbFile.getPath)
+  lazy val sqliteDb: DriverManagerConnectionFactory = new DriverManagerConnectionFactory(
+    "jdbc:sqlite:" + sqliteDbFile.getPath
+  )
 
   /** Get the release information for this release (from release.dat) */
   lazy val releaseInfo: String = Source.fromFile(getFile("release.dat")).mkString
diff --git a/src/main/scala/org/renci/umls/rrf/RRFFile.scala b/src/main/scala/org/renci/umls/rrf/RRFFile.scala
index 5409cd5..2aa5d65 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFFile.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFFile.scala
@@ -11,7 +11,8 @@ import org.apache.commons.codec.digest.DigestUtils
   */
 class RRFFile(val file: File, val filename: String) {
   /** A list of all rows in this file. */
-  lazy val rows: Seq[IndexedSeq[String]] = Source.fromFile(file).getLines.map(_.split("\\|").toIndexedSeq).toSeq
+  lazy val rows: Seq[IndexedSeq[String]] =
+    Source.fromFile(file).getLines.map(_.split("\\|").toIndexedSeq).toSeq
 
   /** Count the number of rows in this file. */
   lazy val rowCount: Long = Source.fromFile(file).getLines.size
diff --git a/src/main/scala/org/renci/umls/rrf/RRFFiles.scala b/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
index 03f57e8..4f7db57 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
@@ -17,7 +17,8 @@ case class FileEntry(
   * The RRFFiles file contains metadata on all of the files in the RRFDir. This is essential, since this contains a
   * list of all the columns in the file.
   */
-class RRFFiles(file: File, cols: RRFCols, filename: String = "MRFILES.RRF") extends RRFFile(file, filename) {
+class RRFFiles(file: File, cols: RRFCols, filename: String = "MRFILES.RRF")
+    extends RRFFile(file, filename) {
   /** Return a list of all files in an RRFFiles file. */
   def files: Seq[FileEntry] = {
     // We'll just hard-code this for now.
@@ -48,5 +49,6 @@ class RRFFiles(file: File, cols: RRFCols, filename: String = "MRFILES.RRF") exte
 
 object RRFFiles {
   /** Wrap an RRF file as an RRFFiles class. */
-  def fromRRF(rrfFile: RRFFile, rrfCols: RRFCols) = new RRFFiles(rrfFile.file, rrfCols, rrfFile.filename)
-}
\ No newline at end of file
+  def fromRRF(rrfFile: RRFFile, rrfCols: RRFCols) =
+    new RRFFiles(rrfFile.file, rrfCols, rrfFile.filename)
+}
diff --git a/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala b/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
index f6d52d7..9c50168 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
@@ -4,15 +4,15 @@ import java.io.File
 
 /** Represents a single hierarchy entry. */
 case class HierarchyEntry(
-  ConceptId: String,                  // CUI
-  AtomId: String,                     // AUI
-  ContextNumber: String,              // CXN
-  ParentAtomId: String,               // PAUI
-  Source: String,                     // SAB
-  Relation: String,                   // RELA
-  PathToRoot: String,                 // PTR
-  HierarchyCode: String,              // HCD
-  ContentViewFlag: String             // CVF
+  ConceptId: String, // CUI
+  AtomId: String, // AUI
+  ContextNumber: String, // CXN
+  ParentAtomId: String, // PAUI
+  Source: String, // SAB
+  Relation: String, // RELA
+  PathToRoot: String, // PTR
+  HierarchyCode: String, // HCD
+  ContentViewFlag: String // CVF
 )
 
 /**
@@ -24,25 +24,19 @@ class RRFHierarchy(file: File, filename: String = "MRHIER.RRF") extends RRFFile(
     // We'll just hard-code this for now.
     // Eventually, it'd be nice to have this automatically settable from MRCOLS.RRF itself, but
     // right now I just don't have the time.
-    rows.map(arr => HierarchyEntry(
-      arr(0),
-      arr(1),
-      arr(2),
-      arr(3),
-      arr(4),
-      arr(5),
-      arr(6),
-      arr(7),
-      arr(8)
-    ))
+    rows.map(
+      arr => HierarchyEntry(arr(0), arr(1), arr(2), arr(3), arr(4), arr(5), arr(6), arr(7), arr(8))
+    )
   }
   lazy val hierarchiesByAtomId = hierarchies.groupBy(_.AtomId)
 
-  def getParents(atomIds: Seq[String]): Set[String] = atomIds.flatMap(hierarchiesByAtomId.getOrElse(_, Seq())).map(_.ParentAtomId).toSet
+  def getParents(atomIds: Seq[String]): Set[String] =
+    atomIds.flatMap(hierarchiesByAtomId.getOrElse(_, Seq())).map(_.ParentAtomId).toSet
   def getOnlyParent(atomIds: Seq[String]): String = {
     val set = getParents(atomIds)
     if (set.size < 1) throw new RuntimeException(s"No parents found for atom IDs: $atomIds")
-    if (set.size > 1) throw new RuntimeException(s"Too many parents found for atom IDs: $atomIds: $set")
+    if (set.size > 1)
+      throw new RuntimeException(s"Too many parents found for atom IDs: $atomIds: $set")
     set.head
   }
 }
@@ -50,4 +44,4 @@ class RRFHierarchy(file: File, filename: String = "MRHIER.RRF") extends RRFFile(
 object RRFHierarchy {
   /** Wrap an RRF file as an RRFHierarchy. */
   def fromRRF(rrfFile: RRFFile) = new RRFHierarchy(rrfFile.file, rrfFile.filename)
-}
\ No newline at end of file
+}

From 5fce917ec66340858c127e5de1ffd9fac7a7538c Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Tue, 12 May 2020 23:22:10 -0400
Subject: [PATCH 2/9] Replaced logback with scribe.

---
 build.sbt                                     |  2 +-
 src/main/resources/logback.xml                | 11 -------
 .../scala/org/renci/umls/CodeMapper.scala     | 33 +++++++++----------
 .../scala/org/renci/umls/db/DbConcepts.scala  | 22 ++++++-------
 .../scala/org/renci/umls/db/DbHierarchy.scala | 10 +++---
 5 files changed, 31 insertions(+), 47 deletions(-)
 delete mode 100644 src/main/resources/logback.xml

diff --git a/build.sbt b/build.sbt
index e00421b..ec00f07 100644
--- a/build.sbt
+++ b/build.sbt
@@ -35,7 +35,7 @@ libraryDependencies ++= {
   Seq(
     // Logging
     "com.typesafe.scala-logging"  %% "scala-logging"          % "3.9.2",
-    "ch.qos.logback"              %  "logback-classic"        % "1.2.3",
+    "com.outr"                    %% "scribe"                 % "2.7.12",
 
     // Command line argument parsing.
     "org.rogach"                  %% "scallop"                % "3.3.2",
diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml
deleted file mode 100644
index 961d6ab..0000000
--- a/src/main/resources/logback.xml
+++ /dev/null
@@ -1,11 +0,0 @@
-<configuration>
-  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
-    <encoder>
-      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
-    </encoder>
-  </appender>
-
-  <root level="info">
-    <appender-ref ref="STDOUT" />
-  </root>
-</configuration>
diff --git a/src/main/scala/org/renci/umls/CodeMapper.scala b/src/main/scala/org/renci/umls/CodeMapper.scala
index 88f8ff9..57fed17 100644
--- a/src/main/scala/org/renci/umls/CodeMapper.scala
+++ b/src/main/scala/org/renci/umls/CodeMapper.scala
@@ -4,7 +4,6 @@ import java.io.{File, FileOutputStream, PrintStream}
 
 import org.rogach.scallop._
 import org.rogach.scallop.exceptions._
-import com.typesafe.scalalogging.{LazyLogging, Logger}
 import org.renci.umls.rrf.RRFDir
 
 import scala.io.Source
@@ -12,15 +11,15 @@ import scala.io.Source
 /**
   * Map terms from one code system to another.
   */
-object CodeMapper extends App with LazyLogging {
+object CodeMapper extends App {
   /**
     * Command line configuration for CodeMapper.
     */
-  class Conf(arguments: Seq[String], logger: Logger) extends ScallopConf(arguments) {
+  class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
     override def onError(e: Throwable): Unit = e match {
       case ScallopException(message) =>
         printHelp
-        logger.error(message)
+        scribe.error(message)
         System.exit(1)
       case ex => super.onError(ex)
     }
@@ -51,27 +50,27 @@ object CodeMapper extends App with LazyLogging {
   }
 
   // Parse command line arguments.
-  val conf = new Conf(args.toIndexedSeq, logger)
+  val conf = new Conf(args.toIndexedSeq)
 
   // Read RRF directory.
   val rrfDir = new RRFDir(conf.rrfDir(), conf.sqliteDb())
-  logger.info(s"Loaded directory for release: ${rrfDir.releaseInfo}")
-  logger.info(s"Using SQLite backend: ${rrfDir.sqliteDb}")
+  scribe.info(s"Loaded directory for release: ${rrfDir.releaseInfo}")
+  scribe.info(s"Using SQLite backend: ${rrfDir.sqliteDb}")
 
   val concepts = rrfDir.concepts
   val sources = concepts.getSources
 
   if (conf.fromSource.isEmpty && conf.toSource.isEmpty) {
-    logger.info("Sources:")
+    scribe.info("Sources:")
     sources.map(entry => {
-      logger.info(s" - ${entry._1} (${entry._2} entries)")
+      scribe.info(s" - ${entry._1} (${entry._2} entries)")
     })
   } else if (conf.fromSource.isEmpty) {
     // We know sourceTo is set.
-    logger.error(s"--source-from is empty, although --source-to is set to '${conf.toSource()}'")
+    scribe.error(s"--source-from is empty, although --source-to is set to '${conf.toSource()}'")
   } else if (conf.toSource.isEmpty) {
     // We know sourceFrom is set.
-    logger.error(s"--source-to is empty, although --source-from is set to '${conf.fromSource()}'")
+    scribe.error(s"--source-to is empty, although --source-from is set to '${conf.fromSource()}'")
   } else {
     // Do we need to filter first?
 
@@ -94,7 +93,7 @@ object CodeMapper extends App with LazyLogging {
       })
     } else {
       val ids = Source.fromFile(conf.idFile()).getLines.map(_.trim).toSeq
-      logger.info(s"Filtering to ${ids.size} IDs from ${conf.idFile()}.")
+      scribe.info(s"Filtering to ${ids.size} IDs from ${conf.idFile()}.")
 
       val halfMapByCode = concepts.getHalfMapsForCodes(conf.fromSource(), ids).groupBy(_.code)
       val map = concepts.getMap(conf.fromSource(), ids, conf.toSource(), Seq.empty)
@@ -112,7 +111,7 @@ object CodeMapper extends App with LazyLogging {
           if (maps.nonEmpty) ("", Seq.empty)
           else {
             val termCuis = allTermCuis.getOrElse(id, Seq.empty)
-            // logger.info(s"Checking $termCuis for parent AUI information.")
+            // scribe.info(s"Checking $termCuis for parent AUI information.")
 
             val termAtomIds = concepts.getAUIsForCUIs(termCuis)
             val parentAtomIds = rrfDir.hierarchy.getParents(termAtomIds)
@@ -145,7 +144,7 @@ object CodeMapper extends App with LazyLogging {
         count += 1
         if (count % 100 == 0) {
           val percentage = count.toFloat / ids.size * 100
-          logger.info(f"Processed $count out of ${ids.size} IDs ($percentage%.2f%%)")
+          scribe.info(f"Processed $count out of ${ids.size} IDs ($percentage%.2f%%)")
         }
 
         (maps, parentHalfMaps)
@@ -158,11 +157,11 @@ object CodeMapper extends App with LazyLogging {
       val percentageTerm = (matchedTerm.size.toFloat / ids.size) * 100
       val percentageParent = (matchedParent.size.toFloat / ids.size) * 100
       val percentageTotal = (matchedTotal.size.toFloat / ids.size) * 100
-      logger.info(f"Matched ${matchedTerm.size} IDs out of ${ids.size} ($percentageTerm%.2f%%)")
-      logger.info(
+      scribe.info(f"Matched ${matchedTerm.size} IDs out of ${ids.size} ($percentageTerm%.2f%%)")
+      scribe.info(
         f"Matched a further ${matchedParent.size} IDs via the parent term ($percentageParent%.2f%%)"
       )
-      logger.info(
+      scribe.info(
         f"Total coverage: ${matchedTotal.size} IDs out of ${ids.size} ($percentageTotal%.2f%%)"
       )
     }
diff --git a/src/main/scala/org/renci/umls/db/DbConcepts.scala b/src/main/scala/org/renci/umls/db/DbConcepts.scala
index a7be05b..4c2b81e 100644
--- a/src/main/scala/org/renci/umls/db/DbConcepts.scala
+++ b/src/main/scala/org/renci/umls/db/DbConcepts.scala
@@ -3,7 +3,6 @@ package org.renci.umls.db
 import java.io.File
 import java.sql.{Connection, PreparedStatement}
 
-import com.typesafe.scalalogging.{LazyLogging, Logger}
 import org.apache.commons.dbcp2.ConnectionFactory
 import org.renci.umls.rrf
 
@@ -20,8 +19,7 @@ import scala.io.Source
 
 /** A wrapper for RRFConcepts that uses SQLite */
 class DbConcepts(db: ConnectionFactory, file: File, filename: String)
-    extends RRFConcepts(file, filename)
-    with LazyLogging {
+    extends RRFConcepts(file, filename) {
   implicit val halfMapCache: Cache[Seq[HalfMap]] = CaffeineCache[Seq[HalfMap]]
 
   /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
@@ -35,9 +33,9 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
   conn1.close()
 
   if (rowsFromDb > 0 && rowsFromDb == rowCount) {
-    logger.info(s"Concept table $tableName has $rowsFromDb rows.")
+    scribe.info(s"Concept table $tableName has $rowsFromDb rows.")
   } else {
-    logger.info(s"Concept table $tableName is not present or is out of sync. Regenerating.")
+    scribe.info(s"Concept table $tableName is not present or is out of sync. Regenerating.")
 
     val conn = db.createConnection()
     val regenerate = conn.createStatement()
@@ -80,7 +78,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
       count += 1
       if (count % 100000 == 0) {
         val percentage = count.toFloat / rowCount * 100
-        logger.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
+        scribe.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
         insertStmt.executeBatch()
         insertStmt.clearBatch()
       }
@@ -127,7 +125,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
         query.setString(1, source)
         val rs = query.executeQuery()
 
-        logger.info(s"Loading halfmaps for $source")
+        scribe.info(s"Loading halfmaps for $source")
         var halfMap = Seq[HalfMap]()
         var count = 0
         while (rs.next()) {
@@ -140,16 +138,16 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
           ) +: halfMap
           count += 1
           if (count % 100000 == 0) {
-            logger.info(s"Loaded $count halfmaps.")
+            scribe.info(s"Loaded $count halfmaps.")
           }
         }
 
         conn.close()
-        logger.info(s"${halfMap.size} halfmaps loaded.")
+        scribe.info(s"${halfMap.size} halfmaps loaded.")
 
         halfMap
       } else {
-        logger.info(s"Loading halfmaps for $source with identifiers: $ids.")
+        scribe.info(s"Loading halfmaps for $source with identifiers: $ids.")
 
         var halfMap = Seq[HalfMap]()
         var count = 0
@@ -181,11 +179,11 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
               count += 1
             }
 
-            logger.info(s"Loaded $count halfmaps.")
+            scribe.info(s"Loaded $count halfmaps.")
           })
 
         conn.close()
-        logger.info(s"${halfMap.size} halfmaps loaded.")
+        scribe.info(s"${halfMap.size} halfmaps loaded.")
 
         halfMap
       }
diff --git a/src/main/scala/org/renci/umls/db/DbHierarchy.scala b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
index 0e451cd..045d7ba 100644
--- a/src/main/scala/org/renci/umls/db/DbHierarchy.scala
+++ b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
@@ -3,7 +3,6 @@ package org.renci.umls.db
 import java.io.File
 import java.sql.{Connection, PreparedStatement}
 
-import com.typesafe.scalalogging.{LazyLogging, Logger}
 import org.apache.commons.dbcp2.ConnectionFactory
 import org.renci.umls.rrf
 
@@ -28,8 +27,7 @@ case class HierarchyEntry(
 
 /** A wrapper for RRFHierarchy that uses SQLite */
 class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
-    extends RRFHierarchy(file, filename)
-    with LazyLogging {
+    extends RRFHierarchy(file, filename) {
   /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
   val tableName: String = "MRHIER_" + sha256
 
@@ -41,9 +39,9 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
   conn1.close()
 
   if (rowsFromDb > 0 && rowsFromDb == rowCount) {
-    logger.info(s"Hierarchy table $tableName has $rowsFromDb rows.")
+    scribe.info(s"Hierarchy table $tableName has $rowsFromDb rows.")
   } else {
-    logger.info(s"Hierarchy table $tableName is not present or is out of sync. Regenerating.")
+    scribe.info(s"Hierarchy table $tableName is not present or is out of sync. Regenerating.")
 
     val conn = db.createConnection()
     val regenerate = conn.createStatement()
@@ -77,7 +75,7 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
       count += 1
       if (count % 100000 == 0) {
         val percentage = count.toFloat / rowCount * 100
-        logger.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
+        scribe.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
         insertStmt.executeBatch()
         insertStmt.clearBatch()
       }

From 24d859a8801011cd93a24153015b8caad6221ce5 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Tue, 12 May 2020 23:25:04 -0400
Subject: [PATCH 3/9] Removed ExplicitResultTypes, which does not work in Scala
 2.13.

---
 .scalafix.conf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.scalafix.conf b/.scalafix.conf
index 5eff99f..436ec6c 100644
--- a/.scalafix.conf
+++ b/.scalafix.conf
@@ -1,5 +1,4 @@
 rules = [
-    ExplicitResultTypes,
     NoAutoTupling,
     RemoveUnused,
     DisableSyntax,

From 89053673cfe1e2abb01c7045ef01d15af9938218 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Tue, 12 May 2020 23:25:47 -0400
Subject: [PATCH 4/9] Cleaned up code with Scalafix and Scalafmt.

---
 src/main/scala/org/renci/umls/db/DbConcepts.scala  | 3 ---
 src/main/scala/org/renci/umls/db/DbHierarchy.scala | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/src/main/scala/org/renci/umls/db/DbConcepts.scala b/src/main/scala/org/renci/umls/db/DbConcepts.scala
index 4c2b81e..e786c05 100644
--- a/src/main/scala/org/renci/umls/db/DbConcepts.scala
+++ b/src/main/scala/org/renci/umls/db/DbConcepts.scala
@@ -1,10 +1,8 @@
 package org.renci.umls.db
 
 import java.io.File
-import java.sql.{Connection, PreparedStatement}
 
 import org.apache.commons.dbcp2.ConnectionFactory
-import org.renci.umls.rrf
 
 import scala.util.Try
 import org.renci.umls.rrf._
@@ -14,7 +12,6 @@ import scalacache.memoization._
 import scalacache.modes.sync._
 
 import scala.concurrent.duration._
-import scala.collection.mutable
 import scala.io.Source
 
 /** A wrapper for RRFConcepts that uses SQLite */
diff --git a/src/main/scala/org/renci/umls/db/DbHierarchy.scala b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
index 045d7ba..1bf76a4 100644
--- a/src/main/scala/org/renci/umls/db/DbHierarchy.scala
+++ b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
@@ -1,15 +1,12 @@
 package org.renci.umls.db
 
 import java.io.File
-import java.sql.{Connection, PreparedStatement}
 
 import org.apache.commons.dbcp2.ConnectionFactory
-import org.renci.umls.rrf
 
 import scala.util.Try
 import org.renci.umls.rrf._
 
-import scala.collection.mutable
 import scala.io.Source
 
 /** Represents a single hierarchy entry. */

From 10422435fa56e3edc58f01fbcb59fd1fea512648 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Wed, 22 Jul 2020 16:32:02 -0400
Subject: [PATCH 5/9] Create sbt-test.yml

Test code with `sbt test` and code style with Scalafmt.
---
 .github/workflows/sbt-test.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 .github/workflows/sbt-test.yml

diff --git a/.github/workflows/sbt-test.yml b/.github/workflows/sbt-test.yml
new file mode 100644
index 0000000..d814314
--- /dev/null
+++ b/.github/workflows/sbt-test.yml
@@ -0,0 +1,22 @@
+name: Scala CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up JDK 1.8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 1.8
+    - name: Run tests
+      run: sbt test
+    - name: Check code style with Scalafmt
+      uses: openlawteam/scalafmt-ci@v2.0.2

From a32901c79eac39c06a5c2b3d4e142ec2a9d8eec7 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Wed, 22 Jul 2020 16:49:55 -0400
Subject: [PATCH 6/9] Updated .github scalafmt with `sbt scalafmtCheckAll`.

Also upgraded scalafmt to latest version.
---
 .github/workflows/sbt-test.yml | 2 +-
 .scalafmt.conf                 | 2 +-
 project/plugins.sbt            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/sbt-test.yml b/.github/workflows/sbt-test.yml
index d814314..d8eeacb 100644
--- a/.github/workflows/sbt-test.yml
+++ b/.github/workflows/sbt-test.yml
@@ -19,4 +19,4 @@ jobs:
     - name: Run tests
       run: sbt test
     - name: Check code style with Scalafmt
-      uses: openlawteam/scalafmt-ci@v2.0.2
+      run: sbt 
diff --git a/.scalafmt.conf b/.scalafmt.conf
index 8b6b438..e0a952c 100644
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@@ -1,4 +1,4 @@
-version = "2.2.2"
+version = "2.6.4"
 style = IntelliJ
 maxColumn = 100
 align = some
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 38163b8..186dd6c 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1,4 +1,4 @@
 // Code formatting and linting tools.
-addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.0.1")
+addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.0")
 addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.3")
 addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.11")

From 791164ad98bef9667beaeb40759c86c00695edeb Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Wed, 22 Jul 2020 16:53:23 -0400
Subject: [PATCH 7/9] Tweaked scalafmt settings and restyled code.

---
 .scalafmt.conf                                |  4 ++--
 .../scala/org/renci/umls/CodeMapper.scala     | 23 +++++++++++--------
 .../scala/org/renci/umls/db/DbConcepts.scala  |  1 +
 .../scala/org/renci/umls/db/DbHierarchy.scala |  2 ++
 .../scala/org/renci/umls/rrf/RRFCols.scala    |  2 ++
 .../org/renci/umls/rrf/RRFConcepts.scala      |  2 ++
 .../scala/org/renci/umls/rrf/RRFFile.scala    |  1 +
 .../scala/org/renci/umls/rrf/RRFFiles.scala   |  2 ++
 .../org/renci/umls/rrf/RRFHierarchy.scala     |  2 ++
 9 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/.scalafmt.conf b/.scalafmt.conf
index e0a952c..92d444a 100644
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@@ -1,4 +1,4 @@
 version = "2.6.4"
-style = IntelliJ
+preset = IntelliJ
+align.preset = some
 maxColumn = 100
-align = some
diff --git a/src/main/scala/org/renci/umls/CodeMapper.scala b/src/main/scala/org/renci/umls/CodeMapper.scala
index 57fed17..06f9a58 100644
--- a/src/main/scala/org/renci/umls/CodeMapper.scala
+++ b/src/main/scala/org/renci/umls/CodeMapper.scala
@@ -12,17 +12,19 @@ import scala.io.Source
   * Map terms from one code system to another.
   */
 object CodeMapper extends App {
+
   /**
     * Command line configuration for CodeMapper.
     */
   class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
-    override def onError(e: Throwable): Unit = e match {
-      case ScallopException(message) =>
-        printHelp
-        scribe.error(message)
-        System.exit(1)
-      case ex => super.onError(ex)
-    }
+    override def onError(e: Throwable): Unit =
+      e match {
+        case ScallopException(message) =>
+          printHelp
+          scribe.error(message)
+          System.exit(1)
+        case ex => super.onError(ex)
+      }
 
     val version = getClass.getPackage.getImplementationVersion
     version(s"CodeMapper: map from one source to another (v$version)")
@@ -125,8 +127,11 @@ object CodeMapper extends App {
             val codes = halfMaps.map(_.code).toSet
             val labels = halfMaps.map(_.label).toSet
 
-            (s"\t${cuis.mkString("|")}\t${sources.mkString("|")}\t${codes.mkString("|")}\t${labels
-              .mkString("|")}", halfMaps)
+            (
+              s"\t${cuis.mkString("|")}\t${sources.mkString("|")}\t${codes.mkString("|")}\t${labels
+                .mkString("|")}",
+              halfMaps
+            )
           }
 
         val halfMaps = halfMapByCode.getOrElse(id, Seq())
diff --git a/src/main/scala/org/renci/umls/db/DbConcepts.scala b/src/main/scala/org/renci/umls/db/DbConcepts.scala
index e786c05..57ae484 100644
--- a/src/main/scala/org/renci/umls/db/DbConcepts.scala
+++ b/src/main/scala/org/renci/umls/db/DbConcepts.scala
@@ -330,6 +330,7 @@ class DbConcepts(db: ConnectionFactory, file: File, filename: String)
 }
 
 object DbConcepts {
+
   /** Wrap an RRF file using a database to cache results. */
   def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) =
     new DbConcepts(db, rrfFile.file, rrfFile.filename)
diff --git a/src/main/scala/org/renci/umls/db/DbHierarchy.scala b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
index 1bf76a4..1db3a4a 100644
--- a/src/main/scala/org/renci/umls/db/DbHierarchy.scala
+++ b/src/main/scala/org/renci/umls/db/DbHierarchy.scala
@@ -25,6 +25,7 @@ case class HierarchyEntry(
 /** A wrapper for RRFHierarchy that uses SQLite */
 class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
     extends RRFHierarchy(file, filename) {
+
   /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
   val tableName: String = "MRHIER_" + sha256
 
@@ -111,6 +112,7 @@ class DbHierarchy(db: ConnectionFactory, file: File, filename: String)
 }
 
 object DbHierarchy {
+
   /** Wrap an RRF file using a database to cache results. */
   def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) =
     new DbHierarchy(db, rrfFile.file, rrfFile.filename)
diff --git a/src/main/scala/org/renci/umls/rrf/RRFCols.scala b/src/main/scala/org/renci/umls/rrf/RRFCols.scala
index 5bc9117..556af5b 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFCols.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFCols.scala
@@ -18,6 +18,7 @@ case class Column(
   * The RRFCols file contains metadata on all of the columns across all files in the RRFDir.
   */
 class RRFCols(file: File, filename: String = "MRCOLS.RRF") extends RRFFile(file, filename) {
+
   /** A list of all columns in an RRFCols file. */
   val columns: Seq[Column] = {
     // We'll just hard-code this for now.
@@ -56,6 +57,7 @@ class RRFCols(file: File, filename: String = "MRCOLS.RRF") extends RRFFile(file,
 }
 
 object RRFCols {
+
   /** Wrap an RRF file as an RRFCols. */
   def fromRRF(rrfFile: RRFFile) = new RRFCols(rrfFile.file, rrfFile.filename)
 }
diff --git a/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala b/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
index 436281a..44c95f1 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFConcepts.scala
@@ -28,6 +28,7 @@ case class Concept(
   * The RRFConcepts file allows you to read concept data from MRCONSO.RRF.
   */
 class RRFConcepts(file: File, filename: String = "MRCONSO.RRF") extends RRFFile(file, filename) {
+
   /** A list of all columns in an RRFConcepts file. */
   def concepts(): Seq[Concept] = {
     // We'll just hard-code this for now.
@@ -63,6 +64,7 @@ class RRFConcepts(file: File, filename: String = "MRCONSO.RRF") extends RRFFile(
 }
 
 object RRFConcepts {
+
   /** Wrap an RRF file as an RRFCols. */
   def fromRRF(rrfFile: RRFFile) = new RRFConcepts(rrfFile.file, rrfFile.filename)
 }
diff --git a/src/main/scala/org/renci/umls/rrf/RRFFile.scala b/src/main/scala/org/renci/umls/rrf/RRFFile.scala
index 2aa5d65..f5cb081 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFFile.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFFile.scala
@@ -10,6 +10,7 @@ import org.apache.commons.codec.digest.DigestUtils
   * Wraps a single RRF file.
   */
 class RRFFile(val file: File, val filename: String) {
+
   /** A list of all rows in this file. */
   lazy val rows: Seq[IndexedSeq[String]] =
     Source.fromFile(file).getLines.map(_.split("\\|").toIndexedSeq).toSeq
diff --git a/src/main/scala/org/renci/umls/rrf/RRFFiles.scala b/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
index 4f7db57..3a06457 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFFiles.scala
@@ -19,6 +19,7 @@ case class FileEntry(
   */
 class RRFFiles(file: File, cols: RRFCols, filename: String = "MRFILES.RRF")
     extends RRFFile(file, filename) {
+
   /** Return a list of all files in an RRFFiles file. */
   def files: Seq[FileEntry] = {
     // We'll just hard-code this for now.
@@ -48,6 +49,7 @@ class RRFFiles(file: File, cols: RRFCols, filename: String = "MRFILES.RRF")
 }
 
 object RRFFiles {
+
   /** Wrap an RRF file as an RRFFiles class. */
   def fromRRF(rrfFile: RRFFile, rrfCols: RRFCols) =
     new RRFFiles(rrfFile.file, rrfCols, rrfFile.filename)
diff --git a/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala b/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
index 9c50168..c4a37cd 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFHierarchy.scala
@@ -19,6 +19,7 @@ case class HierarchyEntry(
   * The RRFHierarchy file contains hierarchy information on atoms in the system.
   */
 class RRFHierarchy(file: File, filename: String = "MRHIER.RRF") extends RRFFile(file, filename) {
+
   /** A list of all columns in an RRFCols file. */
   lazy val hierarchies: Seq[HierarchyEntry] = {
     // We'll just hard-code this for now.
@@ -42,6 +43,7 @@ class RRFHierarchy(file: File, filename: String = "MRHIER.RRF") extends RRFFile(
 }
 
 object RRFHierarchy {
+
   /** Wrap an RRF file as an RRFHierarchy. */
   def fromRRF(rrfFile: RRFFile) = new RRFHierarchy(rrfFile.file, rrfFile.filename)
 }

From 5edccf463d7d28d869fe0c2df44c29ad68593190 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Wed, 22 Jul 2020 17:24:42 -0400
Subject: [PATCH 8/9] Fixed incomplete line in sbt-test.

---
 .github/workflows/sbt-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sbt-test.yml b/.github/workflows/sbt-test.yml
index d8eeacb..a4af28c 100644
--- a/.github/workflows/sbt-test.yml
+++ b/.github/workflows/sbt-test.yml
@@ -19,4 +19,4 @@ jobs:
     - name: Run tests
       run: sbt test
     - name: Check code style with Scalafmt
-      run: sbt 
+      run: sbt scalafmtCheckAll

From 88e1edd2d2f30d450e4a19a0e0532d1289b91c0c Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Wed, 15 Apr 2020 13:59:24 -0400
Subject: [PATCH 9/9] Added code for loading MRMAP into SQLite.

---
 .../scala/org/renci/umls/db/DbMappings.scala  | 106 ++++++++++++++++++
 .../scala/org/renci/umls/rrf/RRFDir.scala     |   5 +-
 .../org/renci/umls/rrf/RRFMappings.scala      |  78 +++++++++++++
 3 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 src/main/scala/org/renci/umls/db/DbMappings.scala
 create mode 100644 src/main/scala/org/renci/umls/rrf/RRFMappings.scala

diff --git a/src/main/scala/org/renci/umls/db/DbMappings.scala b/src/main/scala/org/renci/umls/db/DbMappings.scala
new file mode 100644
index 0000000..2258201
--- /dev/null
+++ b/src/main/scala/org/renci/umls/db/DbMappings.scala
@@ -0,0 +1,106 @@
+package org.renci.umls.db
+
+import java.io.File
+import java.sql.{Connection, PreparedStatement}
+
+import com.typesafe.scalalogging.{LazyLogging, Logger}
+import org.apache.commons.dbcp2.ConnectionFactory
+import org.renci.umls.rrf
+
+import scala.util.Try
+import org.renci.umls.rrf._
+import scalacache._
+import scalacache.caffeine._
+import scalacache.memoization._
+import scalacache.modes.sync._
+
+import scala.concurrent.duration._
+import scala.collection.mutable
+import scala.io.Source
+
+/** A wrapper for RRFMappings that uses  */
+class DbMappings(db: ConnectionFactory, file: File, filename: String) extends RRFMappings(file, filename) with LazyLogging {
+  /** The name of the table used to store this information. We include the SHA-256 hash so we reload it if it changes. */
+  val tableName: String = "MRMAP_" + sha256
+
+  /* Check to see if the MRMAP_ table seems up to date. If not, load it into memory from the file. */
+  val conn1 = db.createConnection()
+  val checkCount = conn1.createStatement()
+  val results = Try { checkCount.executeQuery(s"SELECT COUNT(*) AS cnt FROM $tableName") }
+  val rowsFromDb = if (results.isSuccess) results.get.getInt(1) else -1
+  conn1.close()
+
+  if (rowsFromDb > 0 && rowsFromDb == rowCount) {
+    logger.info(s"Mappings table $tableName has $rowsFromDb rows.")
+  } else {
+    logger.info(s"Mappings table $tableName is not present or is out of sync. Regenerating.")
+
+    val conn = db.createConnection()
+    val regenerate = conn.createStatement()
+    regenerate.execute(s"DROP TABLE IF EXISTS $tableName")
+    regenerate.execute(s"""CREATE TABLE $tableName (
+      |MAPSETCUI TEXT,
+      |MAPSETSAB TEXT,
+      |MAPSUBSETID TEXT,
+      |MAPRANK TEXT,
+      |MAPID TEXT,
+      |MAPSID TEXT,
+      |FROMID	TEXT,
+      |FROMSID TEXT,
+      |FROMEXPR	TEXT,
+      |FROMTYPE	TEXT,
+      |FROMRULE	TEXT,
+      |FROMRES TEXT,
+      |REL TEXT,
+      |RELA TEXT,
+      |TOID TEXT,
+      |TOSID TEXT,
+      |TOEXPR	TEXT,
+      |TOTYPE	TEXT,
+      |TORULE	TEXT,
+      |TORES TEXT,
+      |MAPRULE TEXT,
+      |MAPRES TEXT,
+      |MAPTYPE TEXT,
+      |MAPATN	TEXT,
+      |MAPATV	TEXT,
+      |CVF TEXT
+      )""".stripMargin)
+
+    val insertStmt = conn.prepareStatement(
+      s"INSERT INTO $tableName (MAPSETCUI, MAPSETSAB, MAPSUBSETID, MAPRANK, MAPID, MAPSID, FROMID, FROMSID, FROMEXPR, FROMTYPE, FROMRULE, FROMRES, REL, RELA, TOID, TOSID, TOEXPR, TOTYPE, TORULE, TORES, MAPRULE, MAPRES, MAPTYPE, MAPATN, MAPATV, CVF) " +
+      "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+    )
+
+    var count = 0
+    Source.fromFile(file).getLines.map(_.split("\\|", -1).toIndexedSeq) foreach { row =>
+      insertStmt.clearParameters()
+
+      (1 until 27) foreach ({ index =>
+        insertStmt.setString(index, row(index - 1))
+      })
+      insertStmt.addBatch()
+
+      count += 1
+      if (count % 100000 == 0) {
+        val percentage = count.toFloat/rowCount*100
+        logger.info(f"Batched $count rows out of $rowCount ($percentage%.2f%%), executing.")
+        insertStmt.executeBatch()
+        insertStmt.clearBatch()
+      }
+    }
+    insertStmt.executeBatch()
+
+    // Add indexes.
+    regenerate.execute(s"CREATE INDEX INDEX_MRMAP_FROMID ON $tableName (FROMID);")
+    regenerate.execute(s"CREATE INDEX INDEX_MRMAP_TOID ON $tableName (TOID);")
+    regenerate.execute(s"CREATE INDEX INDEX_MRMAP_REL ON $tableName (REL);")
+
+    conn.close()
+  }
+}
+
+object DbMappings {
+  /** Wrap an RRF file using a database to cache results. */
+  def fromDatabase(db: ConnectionFactory, rrfFile: RRFFile) = new DbMappings(db, rrfFile.file, rrfFile.filename)
+}
\ No newline at end of file
diff --git a/src/main/scala/org/renci/umls/rrf/RRFDir.scala b/src/main/scala/org/renci/umls/rrf/RRFDir.scala
index fb3448f..5cb0d3c 100644
--- a/src/main/scala/org/renci/umls/rrf/RRFDir.scala
+++ b/src/main/scala/org/renci/umls/rrf/RRFDir.scala
@@ -3,7 +3,7 @@ package org.renci.umls.rrf
 import java.io.File
 
 import org.apache.commons.dbcp2.DriverManagerConnectionFactory
-import org.renci.umls.db.{DbConcepts, DbHierarchy}
+import org.renci.umls.db.{DbConcepts, DbHierarchy, DbMappings}
 
 import scala.io.Source
 
@@ -48,4 +48,7 @@ class RRFDir(dir: File, sqliteDbFile: File) {
 
   /** Loads MRCONSO.RRF files and makes them available. */
   lazy val concepts: DbConcepts = DbConcepts.fromDatabase(sqliteDb, getRRFFile("MRCONSO.RRF"))
+
+  /** Loads MRMAP.RRF files and makes them available. */
+  val mappings: DbMappings = DbMappings.fromDatabase(sqliteDb, getRRFFile("MRMAP.RRF"))
 }
diff --git a/src/main/scala/org/renci/umls/rrf/RRFMappings.scala b/src/main/scala/org/renci/umls/rrf/RRFMappings.scala
new file mode 100644
index 0000000..721ec15
--- /dev/null
+++ b/src/main/scala/org/renci/umls/rrf/RRFMappings.scala
@@ -0,0 +1,78 @@
+package org.renci.umls.rrf
+
+import java.io.File
+
+/** Represents a single mapping entry. */
+case class UMLSMapping(
+  mapSet: String,       // MAPSETCUI	Unique identifier for the UMLS concept which represents the whole map set.
+  mapSetSource: String, // MAPSETSAB	Source abbreviation (SAB) for the provider of the map set.
+  mapSubsetId: String,  // MAPSUBSETID	Map subset identifier used to identify a subset of related mappings within a map set. This is used for cases where the FROMEXPR may have more than one potential mapping (optional).
+  mapRank: String,      // MAPRANK	Order in which mappings in a subset should be applied. Used only where MAPSUBSETID is used. (optional)
+  mapId: String,        // MAPID	Unique identifier for this individual mapping. Primary key of this table to identify a particular row.
+  mapSourceId: String,  // MAPSID	Source asserted identifier for this mapping (optional).
+  fromId: String,       // FROMID	Identifier for the entity being mapped from. This is an internal UMLS identifier used to point to an external entity in a source vocabulary (represented by the FROMEXPR). When the source provides such an identifier, it is reused here. Otherwise, it is generated by NLM. The FROMID is only unique within a map set. It is not a pointer to UMLS entities like atoms or concepts. There is a one-to-one correlation between FROMID and a unique set of values in FROMSID, FROMEXPR, FROMTYPE, FROMRULE, and FROMRES within a map set.
+  fromSourceId: String, // FROMSID	Source asserted identifier for the entity being mapped from (optional).
+  fromExpr: String,     // FROMEXPR	Entity being mapped from - can be a single code/identifier /concept name or a complex expression involving multiple codes/identifiers/concept names, Boolean operators and/or punctuation
+  fromType: String,     // FROMTYPE	Type of entity being mapped from.
+  fromRule: String,     // FROMRULE	Machine processable rule applicable to the entity being mapped from (optional)
+  fromRestriction: String, // FROMRES	Restriction applicable to the entity being mapped from (optional).
+  relationship: String, // REL	Relationship of the entity being mapped from to the entity being mapped to.
+  relationshipAdditionalLabel: String, // RELA	Additional relationship label (optional).
+  toId: String,         // TOID	Identifier for the entity being mapped to. This is an internal identifier used to point to an external entity in a source vocabulary (represented by the TOEXPR). When the source provides such an identifier, it is reused here. Otherwise, it is generated by NLM. The TOID is only unique within a map set. It is not a pointer to UMLS entities like atoms or concepts. There is a one-to-one correlation between TOID and a unique set of values in TOSID, TOEXPR, TOTYPE, TORULE, TORES within a map set.
+  toSourceId: String,   // TOSID	Source asserted identifier for the entity being mapped to (optional).
+  toExpr: String,       // TOEXPR	Entity being mapped to - can be a single code/identifier/concept name or a complex expression involving multiple codes/identifiers/concept names, Boolean operators and/or punctuation.
+  toType: String,       // TOTYPE	Type of entity being mapped to.
+  toRule: String,       // TORULE	Machine processable rule applicable to the entity being mapped to (optional).
+  toRestriction: String, // TORES	Restriction applicable to the entity being mapped to (optional).
+  mapRule: String,      // MAPRULE	Machine processable rule applicable to this mapping (optional).
+  mapRestriction: String, // MAPRES	Restriction applicable to this mapping (optional).
+  mapType: String,      // MAPTYPE	Type of mapping (optional).
+  mapAttributeName: String, // MAPATN	The name of the attribute associated with this mapping [not yet in use]
+  mapAttributeValue: String, // MAPATV	The value of the attribute associated with this mapping [not yet in use]
+  contentViewFlag: String // CVF	The Content View Flag is a bit field used to indicate membership in a content view.
+)
+
+/**
+  * The RRFMappings file allows you to read concept data from MRMAP.RRF.
+  */
+class RRFMappings(file: File, filename: String = "MRMAP.RRF") extends RRFFile(file, filename) {
+  /** A list of all columns in an RRFMappings file. */
+  def concepts(): Seq[UMLSMapping] = {
+    // We'll just hard-code this for now.
+    // Eventually, it'd be nice to have this automatically settable from MRFILES.RRF itself, but
+    // right now I just don't have the time.
+    rows.map(arr => UMLSMapping(
+      arr(0),
+      arr(1),
+      arr(2),
+      arr(3),
+      arr(4),
+      arr(5),
+      arr(6),
+      arr(7),
+      arr(8),
+      arr(9),
+      arr(10),
+      arr(12),
+      arr(13),
+      arr(14),
+      arr(15),
+      arr(16),
+      arr(17),
+      arr(18),
+      arr(19),
+      arr(20),
+      arr(21),
+      arr(22),
+      arr(23),
+      arr(24),
+      arr(25),
+      arr(26)
+    ))
+  }
+}
+
+object RRFMappings {
+  /** Wrap an RRF file as an RRFCols. */
+  def fromRRF(rrfFile: RRFFile) = new RRFMappings(rrfFile.file, rrfFile.filename)
+}
\ No newline at end of file