From de712ab1f6c5292b70b7f3a7aa0f1df4881caa52 Mon Sep 17 00:00:00 2001 From: Caio Camatta Date: Mon, 12 Feb 2024 16:18:12 -0500 Subject: [PATCH] Add FetcherCache and tests --- .../ai/chronon/online/FetcherCache.scala | 214 ++++++++++ .../ai/chronon/online/FetcherCacheTest.scala | 381 ++++++++++++++++++ 2 files changed, 595 insertions(+) create mode 100644 online/src/main/scala/ai/chronon/online/FetcherCache.scala create mode 100644 online/src/test/scala/ai/chronon/online/FetcherCacheTest.scala diff --git a/online/src/main/scala/ai/chronon/online/FetcherCache.scala b/online/src/main/scala/ai/chronon/online/FetcherCache.scala new file mode 100644 index 000000000..b67fd57d9 --- /dev/null +++ b/online/src/main/scala/ai/chronon/online/FetcherCache.scala @@ -0,0 +1,214 @@ +package ai.chronon.online + +import ai.chronon.aggregator.windowing.FinalBatchIr +import ai.chronon.api.Extensions.MetadataOps +import ai.chronon.api.GroupBy +import ai.chronon.online.FetcherBase.GroupByRequestMeta +import ai.chronon.online.Fetcher.Request +import ai.chronon.online.FetcherCache.{ + BatchIrCache, + BatchResponses, + CachedBatchResponse, + CachedFinalIrBatchResponse, + CachedMapBatchResponse, + KvStoreBatchResponse +} +import ai.chronon.online.KVStore.{GetRequest, TimedValue} +import com.github.benmanes.caffeine.cache.{Cache => CaffeineCache} + +import scala.util.{Success, Try} +import java.util.concurrent.ConcurrentHashMap +import scala.collection.JavaConverters.mapAsScalaConcurrentMapConverter + +/* + * FetcherCache is an extension to FetcherBase that provides caching functionality. It caches KV store + * requests to decrease feature serving latency. + * */ +trait FetcherCache { + val batchIrCacheName = "batch_cache" + val maybeBatchIrCache: Option[BatchIrCache] = + Option(System.getProperty("ai.chronon.fetcher.batch_ir_cache_size")) + .map(size => new BatchIrCache(batchIrCacheName, size.toInt)) + .orElse(None) + + def isCacheSizeConfigured: Boolean = maybeBatchIrCache.isDefined + + // Memoize which GroupBys have caching enabled + private[online] val isCachingEnabledForGroupBy: collection.concurrent.Map[String, Boolean] = + new ConcurrentHashMap[String, Boolean]().asScala + + def isCachingEnabled(groupBy: GroupBy): Boolean = { + if (!isCacheSizeConfigured || groupBy.getMetaData == null || groupBy.getMetaData.getName == null) return false + + val groupByName = groupBy.getMetaData.getName + isCachingEnabledForGroupBy.getOrElse( + groupByName, { + groupBy.getMetaData.customJsonLookUp("enable_caching") match { + case b: Boolean => + println(s"Caching is ${if (b) "enabled" else "disabled"} for $groupByName") + isCachingEnabledForGroupBy.putIfAbsent(groupByName, b) + b + case null => + println(s"Caching is disabled for $groupByName, enable_caching is not set.") + isCachingEnabledForGroupBy.putIfAbsent(groupByName, false) + false + case _ => false + } + } + ) + } + + protected val caffeineMetricsContext: Metrics.Context = Metrics.Context(Metrics.Environment.JoinFetching) + + /** + * Obtain the Map[String, AnyRef] response from a batch response. + * + * If batch IR caching is enabled, this method will try to fetch the IR from the cache. If it's not in the cache, + * it will decode it from the batch bytes and store it. + * + * @param batchResponses the batch responses + * @param batchBytes the batch bytes corresponding to the batchResponses. Can be `null`. + * @param servingInfo the GroupByServingInfoParsed that contains the info to decode the bytes + * @param decodingFunction the function to decode bytes into Map[String, AnyRef] + * @param keys the keys used to fetch this particular batch response, for caching purposes + */ + private[online] def getMapResponseFromBatchResponse(batchResponses: BatchResponses, + batchBytes: Array[Byte], + decodingFunction: Array[Byte] => Map[String, AnyRef], + servingInfo: GroupByServingInfoParsed, + keys: Map[String, Any]): Map[String, AnyRef] = { + if (!isCachingEnabled(servingInfo.groupBy)) return decodingFunction(batchBytes) + + batchResponses match { + case _: KvStoreBatchResponse => + val batchRequestCacheKey = + BatchIrCache.Key(servingInfo.groupByOps.batchDataset, keys, servingInfo.batchEndTsMillis) + val decodedBytes = decodingFunction(batchBytes) + if (decodedBytes != null) + maybeBatchIrCache.get.cache.put(batchRequestCacheKey, CachedMapBatchResponse(decodedBytes)) + decodedBytes + case cachedResponse: CachedBatchResponse => + cachedResponse match { + case CachedFinalIrBatchResponse(_: FinalBatchIr) => decodingFunction(batchBytes) + case CachedMapBatchResponse(mapResponse: Map[String, AnyRef]) => mapResponse + } + } + } + + /** + * Obtain the FinalBatchIr from a batch response. + * + * If batch IR caching is enabled, this method will try to fetch the IR from the cache. If it's not in the cache, + * it will decode it from the batch bytes and store it. + * + * @param batchResponses the batch responses + * @param batchBytes the batch bytes corresponding to the batchResponses. Can be `null`. + * @param servingInfo the GroupByServingInfoParsed that contains the info to decode the bytes + * @param decodingFunction the function to decode bytes into FinalBatchIr + * @param keys the keys used to fetch this particular batch response, for caching purposes + */ + private[online] def getBatchIrFromBatchResponse( + batchResponses: BatchResponses, + batchBytes: Array[Byte], + servingInfo: GroupByServingInfoParsed, + decodingFunction: (Array[Byte], GroupByServingInfoParsed) => FinalBatchIr, + keys: Map[String, Any]): FinalBatchIr = { + if (!isCachingEnabled(servingInfo.groupBy)) return decodingFunction(batchBytes, servingInfo) + + batchResponses match { + case _: KvStoreBatchResponse => + val batchRequestCacheKey = + BatchIrCache.Key(servingInfo.groupByOps.batchDataset, keys, servingInfo.batchEndTsMillis) + val decodedBytes = decodingFunction(batchBytes, servingInfo) + if (decodedBytes != null) + maybeBatchIrCache.get.cache.put(batchRequestCacheKey, CachedFinalIrBatchResponse(decodedBytes)) + decodedBytes + case cachedResponse: CachedBatchResponse => + cachedResponse match { + case CachedFinalIrBatchResponse(finalBatchIr: FinalBatchIr) => finalBatchIr + case CachedMapBatchResponse(_: Map[String, AnyRef]) => decodingFunction(batchBytes, servingInfo) + } + } + } + + /** + * Given a list of GetRequests, return a map of GetRequests to cached FinalBatchIrs. + */ + def getCachedRequests( + groupByRequestToKvRequest: Seq[(Request, Try[GroupByRequestMeta])]): Map[GetRequest, CachedBatchResponse] = { + if (!isCacheSizeConfigured) return Map.empty + + groupByRequestToKvRequest + .map { + case (request, Success(GroupByRequestMeta(servingInfo, batchRequest, _, _, _))) => + if (!isCachingEnabled(servingInfo.groupBy)) { Map.empty } + else { + val batchRequestCacheKey = + BatchIrCache.Key(batchRequest.dataset, request.keys, servingInfo.batchEndTsMillis) + + // Metrics so we can get per-groupby cache metrics + val metricsContext = + request.context.getOrElse(Metrics.Context(Metrics.Environment.JoinFetching, servingInfo.groupBy)) + + maybeBatchIrCache.get.cache.getIfPresent(batchRequestCacheKey) match { + case null => + metricsContext.increment(s"${batchIrCacheName}_gb_misses") + val emptyMap: Map[GetRequest, CachedBatchResponse] = Map.empty + emptyMap + case cachedIr: CachedBatchResponse => + metricsContext.increment(s"${batchIrCacheName}_gb_hits") + Map(batchRequest -> cachedIr) + } + } + case _ => + val emptyMap: Map[GetRequest, CachedBatchResponse] = Map.empty + emptyMap + } + .foldLeft(Map.empty[GetRequest, CachedBatchResponse])(_ ++ _) + } +} + +object FetcherCache { + private[online] class BatchIrCache(val cacheName: String, val maximumSize: Int = 10000) { + import BatchIrCache._ + + val cache: CaffeineCache[Key, Value] = + LRUCache[Key, Value](cacheName = cacheName, maximumSize = maximumSize) + } + + private[online] object BatchIrCache { + // We use the dataset, keys, and batchEndTsMillis to identify a batch request. + // There's one edge case to be aware of: if a batch job is re-run in the same day, the batchEndTsMillis will + // be the same but the underlying data may have have changed. If that new batch data is needed immediately, the + // Fetcher service should be restarted. + case class Key(dataset: String, keys: Map[String, Any], batchEndTsMillis: Long) + + // FinalBatchIr is for GroupBys using temporally accurate aggregation. + // Map[String, Any] is for GroupBys using snapshot accurate aggregation or no aggregation. + type Value = BatchResponses + } + + // BatchResponses encapsulates either a batch response from kv store or a cached batch response. + sealed abstract class BatchResponses { + def getBatchBytes(batchEndTsMillis: Long): Array[Byte] + } + object BatchResponses { + def apply(kvStoreResponse: Try[Seq[TimedValue]]): KvStoreBatchResponse = KvStoreBatchResponse(kvStoreResponse) + def apply(cachedResponse: FinalBatchIr): CachedFinalIrBatchResponse = CachedFinalIrBatchResponse(cachedResponse) + def apply(cachedResponse: Map[String, AnyRef]): CachedMapBatchResponse = CachedMapBatchResponse(cachedResponse) + } + case class KvStoreBatchResponse(response: Try[Seq[TimedValue]]) extends BatchResponses { + def getBatchBytes(batchEndTsMillis: Long): Array[Byte] = + response + .map(_.maxBy(_.millis)) + .filter(_.millis >= batchEndTsMillis) + .map(_.bytes) + .getOrElse(null) + } + sealed abstract class CachedBatchResponse extends BatchResponses { + // This is the case where we don't have bytes because the decoded IR was cached so we didn't hit the KV store again. + def getBatchBytes(batchEndTsMillis: Long): Null = null + } + case class CachedFinalIrBatchResponse(response: FinalBatchIr) extends CachedBatchResponse + case class CachedMapBatchResponse(response: Map[String, AnyRef]) extends CachedBatchResponse +} diff --git a/online/src/test/scala/ai/chronon/online/FetcherCacheTest.scala b/online/src/test/scala/ai/chronon/online/FetcherCacheTest.scala new file mode 100644 index 000000000..bb476b2f0 --- /dev/null +++ b/online/src/test/scala/ai/chronon/online/FetcherCacheTest.scala @@ -0,0 +1,381 @@ +package ai.chronon.online + +import ai.chronon.aggregator.windowing.FinalBatchIr +import ai.chronon.api.Extensions.GroupByOps +import ai.chronon.api.{Builders, GroupBy} +import ai.chronon.online.FetcherBase._ +import ai.chronon.online.{AvroCodec, FetcherCache, GroupByServingInfoParsed, KVStore, Metrics} +import ai.chronon.online.Fetcher.Request +import ai.chronon.online.FetcherCache.{BatchIrCache, BatchResponses, CachedMapBatchResponse} +import ai.chronon.online.KVStore.TimedValue +import ai.chronon.online.Metrics.Context +import org.junit.Assert.{assertArrayEquals, assertEquals, assertFalse, assertNull, assertTrue, fail} +import org.junit.Test +import org.mockito.Mockito._ +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito +import org.mockito.stubbing.Stubber +import org.scalatestplus.mockito.MockitoSugar + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +trait MockitoHelper extends MockitoSugar { + // Overriding doReturn to fix a known Java/Scala interoperability issue when using Mockito. ("doReturn: ambiguous + // reference to overloaded definition"). An alternative would be to use the 'mockito-scala' library. + def doReturn(toBeReturned: Any): Stubber = { + Mockito.doReturn(toBeReturned, Nil: _*) + } +} + +class FetcherCacheTest extends MockitoHelper { + class TestableFetcherCache(cache: Option[BatchIrCache]) extends FetcherCache { + override val maybeBatchIrCache: Option[BatchIrCache] = cache + } + val batchIrCacheMaximumSize = 50 + + @Test + def test_BatchIrCache_CorrectlyCachesBatchIrs(): Unit = { + val cacheName = "test" + val batchIrCache = new BatchIrCache(cacheName, batchIrCacheMaximumSize) + val dataset = "TEST_GROUPBY_BATCH" + val batchEndTsMillis = 1000L + + def createBatchir(i: Int) = + BatchResponses(FinalBatchIr(collapsed = Array(i), tailHops = Array(Array(Array(i)), Array(Array(i))))) + def createCacheKey(i: Int) = BatchIrCache.Key(dataset, Map("key" -> i), batchEndTsMillis) + + // Create a bunch of test batchIrs and store them in cache + val batchIrs: Map[BatchIrCache.Key, BatchIrCache.Value] = + (0 until batchIrCacheMaximumSize).map(i => createCacheKey(i) -> createBatchir(i)).toMap + batchIrCache.cache.putAll(batchIrs.asJava) + + // Check that the cache contains all the batchIrs we created + batchIrs.foreach(entry => { + val cachedBatchIr = batchIrCache.cache.getIfPresent(entry._1) + assertEquals(cachedBatchIr, entry._2) + }) + } + + @Test + def test_BatchIrCache_CorrectlyCachesMapResponse(): Unit = { + val cacheName = "test" + val batchIrCache = new BatchIrCache(cacheName, batchIrCacheMaximumSize) + val dataset = "TEST_GROUPBY_BATCH" + val batchEndTsMillis = 1000L + + def createMapResponse(i: Int) = + BatchResponses(Map("group_by_key" -> i.asInstanceOf[AnyRef])) + def createCacheKey(i: Int) = BatchIrCache.Key(dataset, Map("key" -> i), batchEndTsMillis) + + // Create a bunch of test mapResponses and store them in cache + val mapResponses: Map[BatchIrCache.Key, BatchIrCache.Value] = + (0 until batchIrCacheMaximumSize).map(i => createCacheKey(i) -> createMapResponse(i)).toMap + batchIrCache.cache.putAll(mapResponses.asJava) + + // Check that the cache contains all the mapResponses we created + mapResponses.foreach(entry => { + val cachedBatchIr = batchIrCache.cache.getIfPresent(entry._1) + assertEquals(cachedBatchIr, entry._2) + }) + } + + // Test that the cache keys are compared by equality, not by reference. In practice, this means that if two keys + // have the same (dataset, keys, batchEndTsMillis), they will only be stored once in the cache. + @Test + def test_BatchIrCache_KeysAreComparedByEquality(): Unit = { + val cacheName = "test" + val batchIrCache = new BatchIrCache(cacheName, batchIrCacheMaximumSize) + + val dataset = "TEST_GROUPBY_BATCH" + val batchEndTsMillis = 1000L + + def createCacheValue(i: Int) = + BatchResponses(FinalBatchIr(collapsed = Array(i), tailHops = Array(Array(Array(i)), Array(Array(i))))) + def createCacheKey(i: Int) = BatchIrCache.Key(dataset, Map("key" -> i), batchEndTsMillis) + + assert(batchIrCache.cache.estimatedSize() == 0) + batchIrCache.cache.put(createCacheKey(1), createCacheValue(1)) + assert(batchIrCache.cache.estimatedSize() == 1) + // Create a second key object with the same values as the first key, make sure it's not stored separately + batchIrCache.cache.put(createCacheKey(1), createCacheValue(1)) + assert(batchIrCache.cache.estimatedSize() == 1) + } + + @Test + def test_getCachedRequests_ReturnsCorrectCachedDataWhenCacheIsEnabled(): Unit = { + val cacheName = "test" + val testCache = Some(new BatchIrCache(cacheName, batchIrCacheMaximumSize)) + val fetcherCache = new TestableFetcherCache(testCache) { + override def isCachingEnabled(groupBy: GroupBy) = true + } + + // Prepare groupByRequestToKvRequest + val batchEndTsMillis = 0L + val keys = Map("key" -> "value") + val eventTs = 1000L + val dataset = "TEST_GROUPBY_BATCH" + val mockGroupByServingInfoParsed = mock[GroupByServingInfoParsed] + val mockContext = mock[Metrics.Context] + val request = Request("req_name", keys, Some(eventTs), Some(mock[Context])) + val getRequest = KVStore.GetRequest("key".getBytes, dataset, Some(eventTs)) + val requestMeta = + GroupByRequestMeta(mockGroupByServingInfoParsed, getRequest, Some(getRequest), Some(eventTs), mockContext) + val groupByRequestToKvRequest: Seq[(Request, Try[GroupByRequestMeta])] = Seq((request, Success(requestMeta))) + + // getCachedRequests should return an empty list when the cache is empty + val cachedRequestBeforePopulating = fetcherCache.getCachedRequests(groupByRequestToKvRequest) + assert(cachedRequestBeforePopulating.isEmpty) + + // Add a GetRequest and a FinalBatchIr + val key = BatchIrCache.Key(getRequest.dataset, keys, batchEndTsMillis) + val finalBatchIr = BatchResponses(FinalBatchIr(Array(1), Array(Array(Array(1)), Array(Array(1))))) + testCache.get.cache.put(key, finalBatchIr) + + // getCachedRequests should return the GetRequest and FinalBatchIr we cached + val cachedRequestsAfterAddingItem = fetcherCache.getCachedRequests(groupByRequestToKvRequest) + assert(cachedRequestsAfterAddingItem.head._1 == getRequest) + assert(cachedRequestsAfterAddingItem.head._2 == finalBatchIr) + } + + @Test + def test_getCachedRequests_DoesNotCacheWhenCacheIsDisabledForGroupBy(): Unit = { + val testCache = new BatchIrCache("test", batchIrCacheMaximumSize) + val spiedTestCache = spy(testCache) + val fetcherCache = new TestableFetcherCache(Some(testCache)) { + // Cache is enabled globally, but disabled for a specific groupBy + override def isCachingEnabled(groupBy: GroupBy) = false + } + + // Prepare groupByRequestToKvRequest + val keys = Map("key" -> "value") + val eventTs = 1000L + val dataset = "TEST_GROUPBY_BATCH" + val mockGroupByServingInfoParsed = mock[GroupByServingInfoParsed] + val mockContext = mock[Metrics.Context] + val request = Request("req_name", keys, Some(eventTs)) + val getRequest = KVStore.GetRequest("key".getBytes, dataset, Some(eventTs)) + val requestMeta = + GroupByRequestMeta(mockGroupByServingInfoParsed, getRequest, Some(getRequest), Some(eventTs), mockContext) + val groupByRequestToKvRequest: Seq[(Request, Try[GroupByRequestMeta])] = Seq((request, Success(requestMeta))) + + val cachedRequests = fetcherCache.getCachedRequests(groupByRequestToKvRequest) + assert(cachedRequests.isEmpty) + // Cache was never called + verify(spiedTestCache, never()).cache + } + + @Test + def test_getBatchBytes_ReturnsLatestTimedValueBytesIfGreaterThanBatchEnd(): Unit = { + val kvStoreResponse = Success( + Seq(TimedValue(Array(1.toByte), 1000L), TimedValue(Array(2.toByte), 2000L)) + ) + val batchResponses = BatchResponses(kvStoreResponse) + val batchBytes = batchResponses.getBatchBytes(1500L) + assertArrayEquals(Array(2.toByte), batchBytes) + } + + @Test + def test_getBatchBytes_ReturnsNullIfLatestTimedValueTimestampIsLessThanBatchEnd(): Unit = { + val kvStoreResponse = Success( + Seq(TimedValue(Array(1.toByte), 1000L), TimedValue(Array(2.toByte), 1500L)) + ) + val batchResponses = BatchResponses(kvStoreResponse) + val batchBytes = batchResponses.getBatchBytes(2000L) + assertNull(batchBytes) + } + + @Test + def test_getBatchBytes_ReturnsNullWhenCachedBatchResponse(): Unit = { + val finalBatchIr = mock[FinalBatchIr] + val batchResponses = BatchResponses(finalBatchIr) + val batchBytes = batchResponses.getBatchBytes(1000L) + assertNull(batchBytes) + } + + @Test + def test_getBatchBytes_ReturnsNullWhenKvStoreBatchResponseFails(): Unit = { + val kvStoreResponse = Failure(new RuntimeException("KV Store error")) + val batchResponses = BatchResponses(kvStoreResponse) + val batchBytes = batchResponses.getBatchBytes(1000L) + assertNull(batchBytes) + } + + @Test + def test_getBatchIrFromBatchResponse_ReturnsCorrectIRsWithCacheEnabled(): Unit = { + // Use a real cache + val batchIrCache = new BatchIrCache("test_cache", batchIrCacheMaximumSize) + + // Create all necessary mocks + val servingInfo = mock[GroupByServingInfoParsed] + val groupByOps = mock[GroupByOps] + val toBatchIr = mock[(Array[Byte], GroupByServingInfoParsed) => FinalBatchIr] + when(servingInfo.groupByOps).thenReturn(groupByOps) + when(groupByOps.batchDataset).thenReturn("test_dataset") + when(servingInfo.groupByOps.batchDataset).thenReturn("test_dataset") + when(servingInfo.batchEndTsMillis).thenReturn(1000L) + + // Dummy data + val batchBytes = Array[Byte](1, 1) + val keys = Map("key" -> "value") + val cacheKey = BatchIrCache.Key(servingInfo.groupByOps.batchDataset, keys, servingInfo.batchEndTsMillis) + + val fetcherCache = new TestableFetcherCache(Some(batchIrCache)) + val spiedFetcherCache = Mockito.spy(fetcherCache) + doReturn(true).when(spiedFetcherCache).isCachingEnabled(any()) + + // 1. Cached BatchResponse returns the same IRs passed in + val finalBatchIr1 = mock[FinalBatchIr] + val cachedBatchResponse = BatchResponses(finalBatchIr1) + val cachedIr = + spiedFetcherCache.getBatchIrFromBatchResponse(cachedBatchResponse, batchBytes, servingInfo, toBatchIr, keys) + assertEquals(finalBatchIr1, cachedIr) + verify(toBatchIr, never())(any(classOf[Array[Byte]]), any()) // no decoding needed + + // 2. Un-cached BatchResponse has IRs added to cache + val finalBatchIr2 = mock[FinalBatchIr] + val kvStoreBatchResponses = BatchResponses(Success(Seq(TimedValue(batchBytes, 1000L)))) + when(toBatchIr(any(), any())).thenReturn(finalBatchIr2) + val uncachedIr = + spiedFetcherCache.getBatchIrFromBatchResponse(kvStoreBatchResponses, batchBytes, servingInfo, toBatchIr, keys) + assertEquals(finalBatchIr2, uncachedIr) + assertEquals(batchIrCache.cache.getIfPresent(cacheKey), BatchResponses(finalBatchIr2)) // key was added + verify(toBatchIr, times(1))(any(), any()) // decoding did happen + } + + @Test + def test_getBatchIrFromBatchResponse_DecodesBatchBytesIfCacheDisabled(): Unit = { + // Set up mocks and dummy data + val servingInfo = mock[GroupByServingInfoParsed] + val batchBytes = Array[Byte](1, 2, 3) + val keys = Map("key" -> "value") + val finalBatchIr = mock[FinalBatchIr] + val toBatchIr = mock[(Array[Byte], GroupByServingInfoParsed) => FinalBatchIr] + val kvStoreBatchResponses = BatchResponses(Success(Seq(TimedValue(batchBytes, 1000L)))) + + val spiedFetcherCache = Mockito.spy(new TestableFetcherCache(None)) + when(toBatchIr(any(), any())).thenReturn(finalBatchIr) + + // When getBatchIrFromBatchResponse is called, it decodes the bytes and doesn't hit the cache + val ir = + spiedFetcherCache.getBatchIrFromBatchResponse(kvStoreBatchResponses, batchBytes, servingInfo, toBatchIr, keys) + verify(toBatchIr, times(1))(batchBytes, servingInfo) // decoding did happen + assertEquals(finalBatchIr, ir) + } + + @Test + def test_getBatchIrFromBatchResponse_ReturnsCorrectMapResponseWithCacheEnabled(): Unit = { + // Use a real cache + val batchIrCache = new BatchIrCache("test_cache", batchIrCacheMaximumSize) + // Set up mocks and dummy data + val servingInfo = mock[GroupByServingInfoParsed] + val groupByOps = mock[GroupByOps] + val outputCodec = mock[AvroCodec] + when(servingInfo.groupByOps).thenReturn(groupByOps) + when(groupByOps.batchDataset).thenReturn("test_dataset") + when(servingInfo.groupByOps.batchDataset).thenReturn("test_dataset") + when(servingInfo.batchEndTsMillis).thenReturn(1000L) + val batchBytes = Array[Byte](1, 2, 3) + val keys = Map("key" -> "value") + val cacheKey = BatchIrCache.Key(servingInfo.groupByOps.batchDataset, keys, servingInfo.batchEndTsMillis) + + val spiedFetcherCache = Mockito.spy(new TestableFetcherCache(Some(batchIrCache))) + doReturn(true).when(spiedFetcherCache).isCachingEnabled(any()) + + // 1. Cached BatchResponse returns the same Map responses passed in + val mapResponse1 = mock[Map[String, AnyRef]] + val cachedBatchResponse = BatchResponses(mapResponse1) + val decodingFunction1 = (bytes: Array[Byte]) => { + fail("Decoding function should not be called when batch response is cached") + mapResponse1 + } + val cachedMapResponse = spiedFetcherCache.getMapResponseFromBatchResponse(cachedBatchResponse, + batchBytes, + decodingFunction1, + servingInfo, + keys) + assertEquals(mapResponse1, cachedMapResponse) + + // 2. Un-cached BatchResponse has Map responses added to cache + val mapResponse2 = mock[Map[String, AnyRef]] + val kvStoreBatchResponses = BatchResponses(Success(Seq(TimedValue(batchBytes, 1000L)))) + def decodingFunction2 = (bytes: Array[Byte]) => mapResponse2 + val decodedMapResponse = spiedFetcherCache.getMapResponseFromBatchResponse(kvStoreBatchResponses, + batchBytes, + decodingFunction2, + servingInfo, + keys) + assertEquals(mapResponse2, decodedMapResponse) + assertEquals(batchIrCache.cache.getIfPresent(cacheKey), CachedMapBatchResponse(mapResponse2)) // key was added + } + + @Test + def test_getMapResponseFromBatchResponse_DecodesBatchBytesIfCacheDisabled(): Unit = { + // Set up mocks and dummy data + val servingInfo = mock[GroupByServingInfoParsed] + val batchBytes = Array[Byte](1, 2, 3) + val keys = Map("key" -> "value") + val mapResponse = mock[Map[String, AnyRef]] + val outputCodec = mock[AvroCodec] + val kvStoreBatchResponses = BatchResponses(Success(Seq(TimedValue(batchBytes, 1000L)))) + when(servingInfo.outputCodec).thenReturn(outputCodec) + when(outputCodec.decodeMap(any())).thenReturn(mapResponse) + + val spiedFetcherCache = Mockito.spy(new TestableFetcherCache(None)) + + // When getMapResponseFromBatchResponse is called, it decodes the bytes and doesn't hit the cache + val decodedMapResponse = spiedFetcherCache.getMapResponseFromBatchResponse(kvStoreBatchResponses, + batchBytes, + servingInfo.outputCodec.decodeMap, + servingInfo, + keys) + verify(servingInfo.outputCodec.decodeMap(any()), times(1)) // decoding did happen + assertEquals(mapResponse, decodedMapResponse) + } + + @Test + def test_isCachingEnabled_CorrectlyDetermineIfCacheIsEnabled(): Unit = { + val baseFetcher = new TestableFetcherCache(Some(new BatchIrCache("test", batchIrCacheMaximumSize))) + def buildGroupByWithCustomJson(name: String, customJson: String = null): GroupBy = + Builders.GroupBy( + metaData = Builders.MetaData(name = name, customJson = customJson) + ) + + assertFalse(baseFetcher.isCachingEnabled(buildGroupByWithCustomJson("test_groupby_1"))) + assertFalse(baseFetcher.isCachingEnabled(buildGroupByWithCustomJson("test_groupby_2", "{}"))) + assertTrue( + baseFetcher + .isCachingEnabled(buildGroupByWithCustomJson("test_groupby_3", "{\"enable_caching\": true}")) + ) + assertFalse( + baseFetcher + .isCachingEnabled(buildGroupByWithCustomJson("test_groupby_4", "{\"enable_caching\": false}")) + ) + assertFalse( + baseFetcher + .isCachingEnabled( + buildGroupByWithCustomJson("test_groupby_5", "{\"enable_caching\": \"string instead of bool\"}") + ) + ) + } + + @Test + def test_isCachingEnabled_Memoizes(): Unit = { + val baseFetcher = new TestableFetcherCache(Some(new BatchIrCache("test", batchIrCacheMaximumSize))) + def buildGroupByWithCustomJson(name: String, customJson: String = null): GroupBy = + Builders.GroupBy( + metaData = Builders.MetaData(name = name, customJson = customJson) + ) + + // the map is clean at the start + assertFalse(baseFetcher.isCachingEnabledForGroupBy.contains("test_memo_gb_1")) + assertFalse(baseFetcher.isCachingEnabledForGroupBy.contains("test_memo_gb_2")) + + // memoization works for both true and false + baseFetcher.isCachingEnabled(buildGroupByWithCustomJson("test_memo_gb_1")) + assertFalse(baseFetcher.isCachingEnabledForGroupBy("test_memo_gb_1")) + + baseFetcher.isCachingEnabled(buildGroupByWithCustomJson("test_memo_gb_2", "{\"enable_caching\": true}")) + assertTrue(baseFetcher.isCachingEnabledForGroupBy("test_memo_gb_2")) + } +}