From 01a2c0c77944759c779ae06dc44198f956ab2da9 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 18 Dec 2024 19:02:44 +0530
Subject: [PATCH 1/8] fix(ingest/kafka): update dependency, tests (#12159)

---
 metadata-ingestion/setup.py                              | 2 +-
 metadata-ingestion/tests/integration/kafka/test_kafka.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 31db711592eb1..6334b3abbb8a0 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -76,7 +76,7 @@
     # now provide prebuilt wheels for most platforms, including M1 Macs and
     # Linux aarch64 (e.g. Docker's linux/arm64). Installing confluent_kafka
     # from source remains a pain.
-    "confluent_kafka>=1.9.0",
+    "confluent_kafka[schemaregistry]>=1.9.0",
     # We currently require both Avro libraries. The codegen uses avro-python3 (above)
     # schema parsers at runtime for generating and reading JSON into Python objects.
     # At the same time, we use Kafka's AvroSerializer, which internally relies on
diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka.py b/metadata-ingestion/tests/integration/kafka/test_kafka.py
index 0d9a714625e96..648c4b26b20a7 100644
--- a/metadata-ingestion/tests/integration/kafka/test_kafka.py
+++ b/metadata-ingestion/tests/integration/kafka/test_kafka.py
@@ -102,7 +102,7 @@ def test_kafka_test_connection(mock_kafka_service, config_dict, is_success):
         test_connection_helpers.assert_capability_report(
             capability_report=report.capability_report,
             failure_capabilities={
-                SourceCapability.SCHEMA_METADATA: "Failed to establish a new connection"
+                SourceCapability.SCHEMA_METADATA: "[Errno 111] Connection refused"
             },
         )
 

From 8c724dbf47dd76a4aefec0a93267e08ddeda7e58 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 18 Dec 2024 12:45:38 -0600
Subject: [PATCH 2/8] feat(api): authorization extended for soft-delete and
 suspend (#12158)

---
 datahub-frontend/app/auth/AuthModule.java     |   2 +
 .../upgrade/config/SystemUpdateConfig.java    |   2 +
 .../restorebackup/RestoreStorageStep.java     |   2 +-
 .../upgrade/system/AbstractMCLStep.java       |   3 +-
 .../bootstrapmcps/BootstrapMCPUtil.java       |   4 +-
 ...ateSchemaFieldsFromSchemaMetadataStep.java |  10 +-
 ...chemaFieldsFromSchemaMetadataStepTest.java |   3 +-
 .../aspect/CachingAspectRetriever.java        |  36 +++-
 .../metadata/aspect/GraphRetriever.java       |  23 +++
 .../metadata/entity/SearchRetriever.java      |  19 ++
 .../metadata/aspect/MockAspectRetriever.java  |   4 +-
 .../java/com/linkedin/metadata/Constants.java |   2 +
 .../ebean/batch/AspectsBatchImplTest.java     |   8 +-
 .../aspect/utils/DefaultAspectsUtil.java      |   2 +-
 .../client/EntityClientAspectRetriever.java   |   7 +-
 .../metadata/client/JavaEntityClient.java     |  21 ++-
 .../client/SystemJavaEntityClient.java        |   2 +-
 .../entity/EntityServiceAspectRetriever.java  |  10 +-
 .../metadata/entity/EntityServiceImpl.java    |  67 +++----
 .../linkedin/metadata/entity/EntityUtils.java |   2 +-
 .../cassandra/CassandraRetentionService.java  |   2 +-
 .../entity/ebean/EbeanRetentionService.java   |   2 +-
 .../query/filter/BaseQueryFilterRewriter.java |   2 +-
 .../SearchDocumentTransformer.java            |   2 -
 .../BusinessAttributeUpdateHookService.java   |   4 +-
 .../service/UpdateGraphIndicesService.java    |   3 +-
 .../service/UpdateIndicesService.java         |   5 +-
 .../metadata/AspectIngestionUtils.java        |  12 +-
 .../hooks/IgnoreUnknownMutatorTest.java       |  12 +-
 .../aspect/utils/DefaultAspectsUtilTest.java  |   3 +-
 .../DataProductUnsetSideEffectTest.java       |   8 +-
 .../entity/EbeanEntityServiceTest.java        |  36 ++--
 .../metadata/entity/EntityServiceTest.java    | 118 ++++++------
 .../cassandra/CassandraEntityServiceTest.java |  11 +-
 .../ebean/batch/ChangeItemImplTest.java       |   4 +-
 .../RecommendationsServiceTest.java           |   3 +-
 .../SchemaFieldSideEffectTest.java            |  12 +-
 .../ContainerExpansionRewriterTest.java       |   5 +-
 .../filter/DomainExpansionRewriterTest.java   |   9 +-
 .../request/AggregationQueryBuilderTest.java  |   9 +-
 .../request/SearchRequestHandlerTest.java     |   1 +
 .../SearchDocumentTransformerTest.java        |  12 ++
 ...ropertyDefinitionDeleteSideEffectTest.java |  12 +-
 .../ShowPropertyAsBadgeValidatorTest.java     |   2 +-
 .../io/datahubproject/test/DataGenerator.java |   5 +-
 .../MCLSpringCommonTestConfiguration.java     |   3 +-
 .../hook/BusinessAttributeUpdateHookTest.java |  16 +-
 .../metadata/context/ActorContext.java        |  48 +++++
 .../metadata/context/OperationContext.java    | 123 ++++++++-----
 .../metadata/context/RetrieverContext.java    |  29 +++
 .../exception/ActorAccessException.java       |   7 +
 .../exception/OperationContextException.java  |   9 +
 .../context/TestOperationContexts.java        | 139 ++++++--------
 .../context/OperationContextTest.java         |   3 +-
 .../token/StatefulTokenService.java           |   2 +-
 .../src/main/resources/application.yaml       |   6 +-
 .../SystemOperationContextFactory.java        |  14 +-
 .../IngestDataPlatformInstancesStep.java      |   4 +-
 .../boot/steps/IngestPoliciesStep.java        |   2 +-
 .../GlobalControllerExceptionHandler.java     |  14 +-
 .../controller/GenericEntitiesController.java |   8 +-
 .../openapi/operations/test/IdController.java |  54 ++++++
 .../openapi/util/MappingUtil.java             |   2 +-
 .../v2/controller/EntityController.java       |   4 +-
 .../v3/controller/EntityController.java       |   4 +-
 ...m.linkedin.entity.entitiesV2.restspec.json |   8 +
 ...m.linkedin.entity.entitiesV2.snapshot.json |   8 +
 .../linkedin/entity/client/EntityClient.java  |  71 ++++++-
 .../entity/client/RestliEntityClient.java     |  13 +-
 .../client/SystemRestliEntityClient.java      |   2 +-
 .../resources/entity/AspectResource.java      |   2 +-
 .../resources/entity/EntityV2Resource.java    |  10 +-
 .../resources/restli/RestliConstants.java     |   3 +
 .../resources/restli/RestliUtils.java         |   8 +
 .../resources/entity/AspectResourceTest.java  |   2 +-
 .../tokens/revokable_access_token_test.py     |  44 +----
 .../tests/tokens/session_access_token_test.py | 173 ++++++++++++++++++
 smoke-test/tests/tokens/token_utils.py        |  53 ++++++
 78 files changed, 980 insertions(+), 431 deletions(-)
 create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
 create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
 rename metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/{ => config}/GlobalControllerExceptionHandler.java (81%)
 create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
 create mode 100644 smoke-test/tests/tokens/session_access_token_test.py
 create mode 100644 smoke-test/tests/tokens/token_utils.py

diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 7fa99ab3cb262..b95515684f01f 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -27,6 +27,7 @@
 import io.datahubproject.metadata.context.EntityRegistryContext;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.OperationContextConfig;
+import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.SearchContext;
 import io.datahubproject.metadata.context.ValidationContext;
 import java.nio.charset.StandardCharsets;
@@ -195,6 +196,7 @@ protected OperationContext provideOperationContext(
         .searchContext(SearchContext.EMPTY)
         .entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY))
         .validationContext(ValidationContext.builder().alternateValidation(false).build())
+        .retrieverContext(RetrieverContext.EMPTY)
         .build(systemAuthentication);
   }
 
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
index 661717c6309cf..fdd84da6044f7 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
@@ -13,6 +13,7 @@
 import com.linkedin.gms.factory.kafka.common.TopicConventionFactory;
 import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
 import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.config.kafka.KafkaConfiguration;
 import com.linkedin.metadata.dao.producer.KafkaEventProducer;
 import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
@@ -186,6 +187,7 @@ protected OperationContext javaSystemOperationContext(
             components.getIndexConvention(),
             RetrieverContext.builder()
                 .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
index 4d53b603c1eaf..1e5cd6cdb2417 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
@@ -180,7 +180,7 @@ private void readerExecutable(ReaderWrapper reader, UpgradeContext context) {
       try {
         aspectRecord =
             EntityUtils.toSystemAspect(
-                    context.opContext().getRetrieverContext().get(), aspect.toEntityAspect())
+                    context.opContext().getRetrieverContext(), aspect.toEntityAspect())
                 .get()
                 .getRecordTemplate();
       } catch (Exception e) {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
index cd7947ce3c11a..56feffd211bcd 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
@@ -113,8 +113,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
                   List<Pair<Future<?>, SystemAspect>> futures;
                   futures =
                       EntityUtils.toSystemAspectFromEbeanAspects(
-                              opContext.getRetrieverContext().get(),
-                              batch.collect(Collectors.toList()))
+                              opContext.getRetrieverContext(), batch.collect(Collectors.toList()))
                           .stream()
                           .map(
                               systemAspect -> {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
index 4cc3edff3eb52..5b807c6c450af 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
@@ -100,8 +100,8 @@ static AspectsBatch generateAspectBatch(
             .collect(Collectors.toList());
 
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
index 55bc8edbf6a76..de03538907432 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
@@ -168,13 +168,13 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
 
                   AspectsBatch aspectsBatch =
                       AspectsBatchImpl.builder()
-                          .retrieverContext(opContext.getRetrieverContext().get())
+                          .retrieverContext(opContext.getRetrieverContext())
                           .items(
                               batch
                                   .flatMap(
                                       ebeanAspectV2 ->
                                           EntityUtils.toSystemAspectFromEbeanAspects(
-                                              opContext.getRetrieverContext().get(),
+                                              opContext.getRetrieverContext(),
                                               Set.of(ebeanAspectV2))
                                               .stream())
                                   .map(
@@ -189,11 +189,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
                                               .auditStamp(systemAspect.getAuditStamp())
                                               .systemMetadata(
                                                   withAppSource(systemAspect.getSystemMetadata()))
-                                              .build(
-                                                  opContext
-                                                      .getRetrieverContext()
-                                                      .get()
-                                                      .getAspectRetriever()))
+                                              .build(opContext.getAspectRetriever()))
                                   .collect(Collectors.toList()))
                           .build();
 
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
index 3a2728b4e1d3d..04b1095e770e0 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
@@ -22,7 +22,6 @@
 import com.linkedin.upgrade.DataHubUpgradeState;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.RetrieverContext;
-import java.util.Optional;
 import java.util.stream.Stream;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -48,7 +47,7 @@ public void setup() {
     step =
         new GenerateSchemaFieldsFromSchemaMetadataStep(
             mockOpContext, mockEntityService, mockAspectDao, 10, 100, 1000);
-    when(mockOpContext.getRetrieverContext()).thenReturn(Optional.of(mockRetrieverContext));
+    when(mockOpContext.getRetrieverContext()).thenReturn(mockRetrieverContext);
   }
 
   /** Test to verify the correct step ID is returned. */
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
index 77e799f752455..375dd8cf8911e 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
@@ -1,4 +1,38 @@
 package com.linkedin.metadata.aspect;
 
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.metadata.models.registry.EmptyEntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nonnull;
+
 /** Responses can be cached based on application.yaml caching configuration for the EntityClient */
-public interface CachingAspectRetriever extends AspectRetriever {}
+public interface CachingAspectRetriever extends AspectRetriever {
+
+  CachingAspectRetriever EMPTY = new EmptyAspectRetriever();
+
+  class EmptyAspectRetriever implements CachingAspectRetriever {
+    @Nonnull
+    @Override
+    public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
+        Set<Urn> urns, Set<String> aspectNames) {
+      return Collections.emptyMap();
+    }
+
+    @Nonnull
+    @Override
+    public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
+        Map<Urn, Set<String>> urnAspectNames) {
+      return Collections.emptyMap();
+    }
+
+    @Nonnull
+    @Override
+    public EntityRegistry getEntityRegistry() {
+      return EmptyEntityRegistry.EMPTY;
+    }
+  }
+}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
index f6858e7da4ba6..30a2c1eb9df8c 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
@@ -4,6 +4,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipFilter;
 import com.linkedin.metadata.query.filter.SortCriterion;
+import java.util.Collections;
 import java.util.List;
 import java.util.function.Function;
 import javax.annotation.Nonnull;
@@ -97,4 +98,26 @@ default void consumeRelatedEntities(
       }
     }
   }
+
+  GraphRetriever EMPTY = new EmptyGraphRetriever();
+
+  class EmptyGraphRetriever implements GraphRetriever {
+
+    @Nonnull
+    @Override
+    public RelatedEntitiesScrollResult scrollRelatedEntities(
+        @Nullable List<String> sourceTypes,
+        @Nonnull Filter sourceEntityFilter,
+        @Nullable List<String> destinationTypes,
+        @Nonnull Filter destinationEntityFilter,
+        @Nonnull List<String> relationshipTypes,
+        @Nonnull RelationshipFilter relationshipFilter,
+        @Nonnull List<SortCriterion> sortCriterion,
+        @Nullable String scrollId,
+        int count,
+        @Nullable Long startTimeMillis,
+        @Nullable Long endTimeMillis) {
+      return new RelatedEntitiesScrollResult(0, 0, null, Collections.emptyList());
+    }
+  }
 }
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
index eaa106b8d1f63..d4894c97015f8 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
@@ -2,6 +2,7 @@
 
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.search.ScrollResult;
+import com.linkedin.metadata.search.SearchEntityArray;
 import java.util.List;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -21,4 +22,22 @@ ScrollResult scroll(
       @Nullable Filter filters,
       @Nullable String scrollId,
       int count);
+
+  SearchRetriever EMPTY = new EmptySearchRetriever();
+
+  class EmptySearchRetriever implements SearchRetriever {
+
+    @Override
+    public ScrollResult scroll(
+        @Nonnull List<String> entities,
+        @Nullable Filter filters,
+        @Nullable String scrollId,
+        int count) {
+      ScrollResult empty = new ScrollResult();
+      empty.setEntities(new SearchEntityArray());
+      empty.setNumEntities(0);
+      empty.setPageSize(0);
+      return empty;
+    }
+  }
 }
diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
index 65705f15022b6..98a6d59004a92 100644
--- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
+++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
@@ -5,7 +5,7 @@
 import com.linkedin.data.DataMap;
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.mxe.SystemMetadata;
@@ -22,7 +22,7 @@
 import javax.annotation.Nonnull;
 import org.mockito.Mockito;
 
-public class MockAspectRetriever implements AspectRetriever {
+public class MockAspectRetriever implements CachingAspectRetriever {
   private final Map<Urn, Map<String, Aspect>> data;
   private final Map<Urn, Map<String, SystemAspect>> systemData = new HashMap<>();
 
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index ff6a79108600a..09f873ebf7bc9 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -409,6 +409,8 @@ public class Constants {
   /** User Status */
   public static final String CORP_USER_STATUS_ACTIVE = "ACTIVE";
 
+  public static final String CORP_USER_STATUS_SUSPENDED = "SUSPENDED";
+
   /** Task Runs */
   public static final String DATA_PROCESS_INSTANCE_ENTITY_NAME = "dataProcessInstance";
 
diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
index 9f57d36f800de..a3099b9ee21ea 100644
--- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
+++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
@@ -16,7 +16,7 @@
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.dataset.DatasetProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.patch.GenericJsonPatch;
@@ -56,7 +56,7 @@
 
 public class AspectsBatchImplTest {
   private EntityRegistry testRegistry;
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeTest
@@ -75,12 +75,12 @@ public void beforeTest() throws EntityRegistryException {
 
   @BeforeMethod
   public void setup() {
-    this.mockAspectRetriever = mock(AspectRetriever.class);
+    this.mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(this.mockAspectRetriever.getEntityRegistry()).thenReturn(testRegistry);
     this.retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .graphRetriever(mock(GraphRetriever.class))
             .build();
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
index 99eadd223acd1..82bc0ae1409c5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
@@ -137,7 +137,7 @@ public static List<MCPItem> getAdditionalChanges(
                               getProposalFromAspectForDefault(
                                   entry.getKey(), entry.getValue(), entityKeyAspect, templateItem),
                               templateItem.getAuditStamp(),
-                              opContext.getAspectRetrieverOpt().get()))
+                              opContext.getAspectRetriever()))
                   .filter(Objects::nonNull);
             })
         .collect(Collectors.toList());
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
index bba8324d0c561..669ec751f87c6 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
@@ -35,7 +35,7 @@ public EntityRegistry getEntityRegistry() {
   @Override
   public Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) {
     try {
-      return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName);
+      return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName, false);
     } catch (RemoteInvocationException | URISyntaxException e) {
       throw new RuntimeException(e);
     }
@@ -49,7 +49,7 @@ public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
       return Map.of();
     } else {
       try {
-        return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames);
+        return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames, false);
       } catch (RemoteInvocationException | URISyntaxException e) {
         throw new RuntimeException(e);
       }
@@ -70,7 +70,8 @@ public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
             urnAspectNames.keySet(),
             urnAspectNames.values().stream()
                 .flatMap(Collection::stream)
-                .collect(Collectors.toSet()));
+                .collect(Collectors.toSet()),
+            false);
       } catch (RemoteInvocationException | URISyntaxException e) {
         throw new RuntimeException(e);
       }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index 29faa3955ea66..3d35f5956b0f4 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -106,11 +106,17 @@ public EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final Set<String> projectedAspects =
         aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
-    return entityService.getEntityV2(opContext, entityName, urn, projectedAspects);
+    return entityService.getEntityV2(
+        opContext,
+        entityName,
+        urn,
+        projectedAspects,
+        alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
   }
 
   @Override
@@ -126,7 +132,8 @@ public Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull Set<Urn> urns,
-      @Nullable Set<String> aspectNames)
+      @Nullable Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final Set<String> projectedAspects =
         aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
@@ -139,7 +146,11 @@ public Map<Urn, EntityResponse> batchGetV2(
               try {
                 responseMap.putAll(
                     entityService.getEntitiesV2(
-                        opContext, entityName, new HashSet<>(batch), projectedAspects));
+                        opContext,
+                        entityName,
+                        new HashSet<>(batch),
+                        projectedAspects,
+                        alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect));
               } catch (URISyntaxException e) {
                 throw new RuntimeException(e);
               }
@@ -772,7 +783,7 @@ public List<String> batchIngestProposals(
                       .mcps(
                           batch,
                           auditStamp,
-                          opContext.getRetrieverContext().get(),
+                          opContext.getRetrieverContext(),
                           opContext.getValidationContext().isAlternateValidation())
                       .build();
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
index eda9b3a880228..1d2fd422d7f46 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
@@ -89,6 +89,6 @@ public Map<Urn, EntityResponse> batchGetV2NoCache(
       @Nonnull Set<Urn> urns,
       @Nullable Set<String> aspectNames)
       throws RemoteInvocationException, URISyntaxException {
-    return super.batchGetV2(opContext, entityName, urns, aspectNames);
+    return super.batchGetV2(opContext, entityName, urns, aspectNames, false);
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
index 626a1f72f5fb7..50cf8af30d606 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
@@ -5,7 +5,7 @@
 
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.AspectRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.datahubproject.metadata.context.OperationContext;
@@ -22,7 +22,7 @@
 
 @Getter
 @Builder
-public class EntityServiceAspectRetriever implements CachingAspectRetriever {
+public class EntityServiceAspectRetriever implements AspectRetriever {
 
   @Setter private OperationContext systemOperationContext;
   private final EntityRegistry entityRegistry;
@@ -46,7 +46,8 @@ public Map<Urn, Map<String, com.linkedin.entity.Aspect>> getLatestAspectObjects(
       String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
       try {
         return entityResponseToAspectMap(
-            entityService.getEntitiesV2(systemOperationContext, entityName, urns, aspectNames));
+            entityService.getEntitiesV2(
+                systemOperationContext, entityName, urns, aspectNames, false));
       } catch (URISyntaxException e) {
         throw new RuntimeException(e);
       }
@@ -71,7 +72,8 @@ public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
                 urnAspectNames.keySet(),
                 urnAspectNames.values().stream()
                     .flatMap(Collection::stream)
-                    .collect(Collectors.toSet())),
+                    .collect(Collectors.toSet()),
+                false),
             entityRegistry);
       } catch (URISyntaxException e) {
         throw new RuntimeException(e);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 6de7784bfbc0e..8ae09111204ca 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -261,8 +261,7 @@ public Map<Urn, List<RecordTemplate>> getLatestAspects(
     }
 
     List<SystemAspect> systemAspects =
-        EntityUtils.toSystemAspects(
-            opContext.getRetrieverContext().get(), batchGetResults.values());
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values());
 
     systemAspects.stream()
         // for now, don't add the key aspect here we have already added it above
@@ -290,8 +289,7 @@ public Map<String, RecordTemplate> getLatestAspectsForUrn(
     Map<EntityAspectIdentifier, EntityAspect> batchGetResults =
         getLatestAspect(opContext, new HashSet<>(Arrays.asList(urn)), aspectNames, forUpdate);
 
-    return EntityUtils.toSystemAspects(
-            opContext.getRetrieverContext().get(), batchGetResults.values())
+    return EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values())
         .stream()
         .map(
             systemAspect -> Pair.of(systemAspect.getAspectName(), systemAspect.getRecordTemplate()))
@@ -335,7 +333,7 @@ public Pair<RecordTemplate, Long> getAspectVersionPair(
     final Optional<EntityAspect> maybeAspect = Optional.ofNullable(aspectDao.getAspect(primaryKey));
 
     return Pair.of(
-        EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), maybeAspect.orElse(null))
+        EntityUtils.toSystemAspect(opContext.getRetrieverContext(), maybeAspect.orElse(null))
             .map(SystemAspect::getRecordTemplate)
             .orElse(null),
         version);
@@ -721,7 +719,7 @@ public ListResult<RecordTemplate> listLatestAspects(
     }
 
     return new ListResult<>(
-        EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), entityAspects).stream()
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), entityAspects).stream()
             .map(SystemAspect::getRecordTemplate)
             .collect(Collectors.toList()),
         aspectMetadataList.getMetadata(),
@@ -758,12 +756,12 @@ public List<UpdateAspectResult> ingestAspects(
                         .recordTemplate(pair.getValue())
                         .systemMetadata(systemMetadata)
                         .auditStamp(auditStamp)
-                        .build(opContext.getAspectRetrieverOpt().get()))
+                        .build(opContext.getAspectRetriever()))
             .collect(Collectors.toList());
     return ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -815,13 +813,13 @@ private void processPostCommitMCLSideEffects(
     log.debug("Considering {} MCLs post commit side effects.", mcls.size());
     List<MCLItem> batch =
         mcls.stream()
-            .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetrieverOpt().get()))
+            .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetriever()))
             .collect(Collectors.toList());
 
     Iterable<List<MCPItem>> iterable =
         () ->
             Iterators.partition(
-                AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext().get())
+                AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext())
                     .iterator(),
                 MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE);
     StreamSupport.stream(iterable.spliterator(), false)
@@ -831,7 +829,7 @@ private void processPostCommitMCLSideEffects(
                   ingestProposalAsync(
                           AspectsBatchImpl.builder()
                               .items(sideEffects)
-                              .retrieverContext(opContext.getRetrieverContext().get())
+                              .retrieverContext(opContext.getRetrieverContext())
                               .build())
                       .count();
               log.info("Generated {} MCP SideEffects for async processing", count);
@@ -879,8 +877,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
                   aspectDao.getLatestAspects(urnAspects, true);
 
               final Map<String, Map<String, SystemAspect>> batchAspects =
-                  EntityUtils.toSystemAspects(
-                      opContext.getRetrieverContext().get(), databaseAspects);
+                  EntityUtils.toSystemAspects(opContext.getRetrieverContext(), databaseAspects);
 
               // read #2 (potentially)
               final Map<String, Map<String, Long>> nextVersions =
@@ -903,7 +900,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
 
                 Map<String, Map<String, SystemAspect>> newLatestAspects =
                     EntityUtils.toSystemAspects(
-                        opContext.getRetrieverContext().get(),
+                        opContext.getRetrieverContext(),
                         aspectDao.getLatestAspects(updatedItems.getFirst(), true));
                 // merge
                 updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects);
@@ -941,7 +938,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
 
               // do final pre-commit checks with previous aspect value
               ValidationExceptionCollection exceptions =
-                  AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext().get());
+                  AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext());
 
               if (exceptions.hasFatalExceptions()) {
                 // IF this is a client request/API request we fail the `transaction batch`
@@ -1143,8 +1140,8 @@ public RecordTemplate ingestAspectIfNotPresent(
                     .recordTemplate(newValue)
                     .systemMetadata(systemMetadata)
                     .auditStamp(auditStamp)
-                    .build(opContext.getAspectRetrieverOpt().get()),
-                opContext.getRetrieverContext().get())
+                    .build(opContext.getAspectRetriever()),
+                opContext.getRetrieverContext())
             .build();
     List<UpdateAspectResult> ingested = ingestAspects(opContext, aspectsBatch, true, false);
 
@@ -1169,7 +1166,7 @@ public IngestResult ingestProposal(
     return ingestProposal(
             opContext,
             AspectsBatchImpl.builder()
-                .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext().get())
+                .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext())
                 .build(),
             async)
         .stream()
@@ -1246,7 +1243,7 @@ private Stream<IngestResult> ingestTimeseriesProposal(
                           .recordTemplate(
                               EntityApiUtils.buildKeyAspect(
                                   opContext.getEntityRegistry(), item.getUrn()))
-                          .build(opContext.getAspectRetrieverOpt().get()))
+                          .build(opContext.getAspectRetriever()))
               .collect(Collectors.toList());
 
       ingestProposalSync(
@@ -1469,7 +1466,7 @@ public List<RestoreIndicesResult> restoreIndices(
 
                 List<SystemAspect> systemAspects =
                     EntityUtils.toSystemAspectFromEbeanAspects(
-                        opContext.getRetrieverContext().get(), batch.collect(Collectors.toList()));
+                        opContext.getRetrieverContext(), batch.collect(Collectors.toList()));
 
                 RestoreIndicesResult result = restoreIndices(opContext, systemAspects, logger);
                 result.timeSqlQueryMs = timeSqlQueryMs;
@@ -1513,7 +1510,7 @@ public List<RestoreIndicesResult> restoreIndices(
         long startTime = System.currentTimeMillis();
         List<SystemAspect> systemAspects =
             EntityUtils.toSystemAspects(
-                opContext.getRetrieverContext().get(),
+                opContext.getRetrieverContext(),
                 getLatestAspect(opContext, entityBatch.getValue(), aspectNames, false).values());
         long timeSqlQueryMs = System.currentTimeMillis() - startTime;
 
@@ -1649,12 +1646,12 @@ private RestoreIndicesResult restoreIndices(
                   .auditStamp(auditStamp)
                   .systemMetadata(latestSystemMetadata)
                   .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))
-                  .build(opContext.getAspectRetrieverOpt().get()));
+                  .build(opContext.getAspectRetriever()));
       Stream<IngestResult> defaultAspectsResult =
           ingestProposalSync(
               opContext,
               AspectsBatchImpl.builder()
-                  .retrieverContext(opContext.getRetrieverContext().get())
+                  .retrieverContext(opContext.getRetrieverContext())
                   .items(keyAspect)
                   .build());
       defaultAspectsCreated += defaultAspectsResult.count();
@@ -1966,7 +1963,7 @@ private void ingestSnapshotUnion(
 
     AspectsBatchImpl aspectsBatch =
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(
                 aspectRecordsToIngest.stream()
                     .map(
@@ -1977,7 +1974,7 @@ private void ingestSnapshotUnion(
                                 .recordTemplate(pair.getValue())
                                 .auditStamp(auditStamp)
                                 .systemMetadata(systemMetadata)
-                                .build(opContext.getAspectRetrieverOpt().get()))
+                                .build(opContext.getAspectRetriever()))
                     .collect(Collectors.toList()))
             .build();
 
@@ -2128,7 +2125,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn)
     }
 
     SystemMetadata latestKeySystemMetadata =
-        EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), latestKey)
+        EntityUtils.toSystemAspect(opContext.getRetrieverContext(), latestKey)
             .map(SystemAspect::getSystemMetadata)
             .get();
     RollbackResult result =
@@ -2253,11 +2250,11 @@ private RollbackResult deleteAspectWithoutMCL(
             .urn(entityUrn)
             .aspectName(aspectName)
             .auditStamp(auditStamp)
-            .build(opContext.getAspectRetrieverOpt().get());
+            .build(opContext.getAspectRetriever());
 
     // Delete validation hooks
     ValidationExceptionCollection exceptions =
-        AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext().get());
+        AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext());
     if (!exceptions.isEmpty()) {
       throw new ValidationException(collectMetrics(exceptions).toString());
     }
@@ -2271,7 +2268,7 @@ private RollbackResult deleteAspectWithoutMCL(
               final EntityAspect.EntitySystemAspect latest =
                   (EntityAspect.EntitySystemAspect)
                       EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(),
+                              opContext.getRetrieverContext(),
                               aspectDao.getLatestAspect(urn, aspectName, false))
                           .orElse(null);
 
@@ -2299,7 +2296,7 @@ private RollbackResult deleteAspectWithoutMCL(
                 EntityAspect.EntitySystemAspect candidateAspect =
                     (EntityAspect.EntitySystemAspect)
                         EntityUtils.toSystemAspect(
-                                opContext.getRetrieverContext().get(),
+                                opContext.getRetrieverContext(),
                                 aspectDao.getAspect(urn, aspectName, maxVersion))
                             .orElse(null);
                 SystemMetadata previousSysMetadata =
@@ -2325,13 +2322,9 @@ private RollbackResult deleteAspectWithoutMCL(
                                       .urn(UrnUtils.getUrn(toDelete.getUrn()))
                                       .aspectName(toDelete.getAspect())
                                       .auditStamp(auditStamp)
-                                      .build(
-                                          opContext
-                                              .getRetrieverContext()
-                                              .get()
-                                              .getAspectRetriever()))
+                                      .build(opContext.getAspectRetriever()))
                           .collect(Collectors.toList()),
-                      opContext.getRetrieverContext().get());
+                      opContext.getRetrieverContext());
               if (!preCommitExceptions.isEmpty()) {
                 throw new ValidationException(collectMetrics(preCommitExceptions).toString());
               }
@@ -2509,7 +2502,7 @@ private Map<EntityAspectIdentifier, EnvelopedAspect> getEnvelopedAspects(
     final Map<EntityAspectIdentifier, EntityAspect> dbEntries = aspectDao.batchGet(dbKeys, false);
 
     List<SystemAspect> envelopedAspects =
-        EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), dbEntries.values());
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), dbEntries.values());
 
     return envelopedAspects.stream()
         .collect(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
index 3c4109970e9d0..da48a2b76d6d5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
@@ -72,7 +72,7 @@ public static void ingestChangeProposals(
     entityService.ingestProposal(
         opContext,
         AspectsBatchImpl.builder()
-            .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext().get())
+            .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext())
             .build(),
         async);
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
index ccc1910ba5cdb..c595e3e07b834 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
@@ -64,7 +64,7 @@ protected AspectsBatch buildAspectsBatch(
       List<MetadataChangeProposal> mcps,
       @Nonnull AuditStamp auditStamp) {
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
index 49fa555e006f6..74d0d8b0964de 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
@@ -59,7 +59,7 @@ protected AspectsBatch buildAspectsBatch(
       List<MetadataChangeProposal> mcps,
       @Nonnull AuditStamp auditStamp) {
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
index 367705d369c7c..6c5c6243d3362 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
@@ -143,7 +143,7 @@ private static QueryBuilder expandTerms(
     if (!queryUrns.isEmpty()) {
 
       scrollGraph(
-          opContext.getRetrieverContext().get().getGraphRetriever(),
+          opContext.getRetrieverContext().getGraphRetriever(),
           queryUrns,
           relationshipTypes,
           relationshipDirection,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 4bb8e0630de48..b4ad847cb7afc 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -437,8 +437,6 @@ private void setStructuredPropertiesSearchValue(
 
     Map<Urn, Map<String, Aspect>> definitions =
         opContext
-            .getRetrieverContext()
-            .get()
             .getAspectRetriever()
             .getLatestAspectObjects(
                 propertyMap.keySet(), Set.of(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
index ad2825ead3d0d..4a692e9534622 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
@@ -112,7 +112,7 @@ private void fetchRelatedEntities(
       @Nullable String scrollId,
       int consumedEntityCount,
       int batchNumber) {
-    GraphRetriever graph = opContext.getRetrieverContext().get().getGraphRetriever();
+    GraphRetriever graph = opContext.getRetrieverContext().getGraphRetriever();
     final ArrayList<Future<ExecutionResult>> futureList = new ArrayList<>();
     RelatedEntitiesScrollResult result =
         graph.scrollRelatedEntities(
@@ -165,7 +165,7 @@ private Callable<ExecutionResult> processBatch(
     return () -> {
       StopWatch stopWatch = new StopWatch();
       stopWatch.start();
-      AspectRetriever aspectRetriever = opContext.getAspectRetrieverOpt().get();
+      AspectRetriever aspectRetriever = opContext.getAspectRetriever();
       log.info("Batch {} for BA:{} started", batchNumber, entityKey);
       ExecutionResult executionResult = new ExecutionResult();
       executionResult.setBatchNumber(batchNumber);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
index efe073fc00dfd..4b09bc00efb61 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
@@ -94,8 +94,7 @@ public UpdateGraphIndicesService(
   public void handleChangeEvent(
       @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
     try {
-      MCLItemImpl mclItem =
-          MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+      MCLItemImpl mclItem = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
 
       if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
         handleUpdateChangeEvent(opContext, mclItem);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
index 187ef3e8c6229..c5fc9ebdac9fa 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
@@ -121,11 +121,10 @@ public UpdateIndicesService(
   public void handleChangeEvent(
       @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
     try {
-      MCLItemImpl batch =
-          MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+      MCLItemImpl batch = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
 
       Stream<MCLItem> sideEffects =
-          AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext().get());
+          AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext());
 
       for (MCLItem mclItem :
           Stream.concat(Stream.of(batch), sideEffects).collect(Collectors.toList())) {
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
index 12b12cf105196..fa6ab7932001b 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
@@ -46,12 +46,12 @@ public static Map<Urn, CorpUserKey> ingestCorpUserKeyAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -83,12 +83,12 @@ public static Map<Urn, CorpUserInfo> ingestCorpUserInfoAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -121,12 +121,12 @@ public static Map<Urn, ChartInfo> ingestChartInfoAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
index 11a3153abcaee..19be1eb14667d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
@@ -16,7 +16,8 @@
 import com.linkedin.data.template.StringMap;
 import com.linkedin.dataset.DatasetProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
 import com.linkedin.metadata.entity.SearchRetriever;
@@ -28,7 +29,6 @@
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -53,17 +53,17 @@ public class IgnoreUnknownMutatorTest {
   private static final Urn TEST_DATASET_URN =
       UrnUtils.getUrn(
           "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
index 04aff4edf456d..e7ed267113159 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
@@ -56,8 +56,7 @@ public void testAdditionalChanges() {
         DefaultAspectsUtil.getAdditionalChanges(
                 opContext,
                 AspectsBatchImpl.builder()
-                    .mcps(
-                        List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext().get())
+                    .mcps(List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext())
                     .build()
                     .getMCPItems(),
                 entityServiceImpl,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
index 976b165fea53d..215e1e2431efa 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
@@ -15,7 +15,7 @@
 import com.linkedin.dataproduct.DataProductAssociationArray;
 import com.linkedin.dataproduct.DataProductProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.aspect.batch.MCPItem;
@@ -75,12 +75,12 @@ public class DataProductUnsetSideEffectTest {
                       .build()))
           .build();
 
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     GraphRetriever graphRetriever = mock(GraphRetriever.class);
     RelatedEntities relatedEntities =
@@ -139,7 +139,7 @@ public void setup() {
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .graphRetriever(graphRetriever)
             .build();
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
index 0386031cbcad8..88f84ee94c8ee 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
@@ -19,6 +19,7 @@
 import com.linkedin.metadata.AspectGenerationUtils;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.EbeanTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.config.EbeanConfiguration;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
@@ -98,12 +99,15 @@ public void setupTest() {
                             .entityService(_entityServiceImpl)
                             .entityRegistry(_testEntityRegistry)
                             .build())
-                    .graphRetriever(TestOperationContexts.emptyGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(
+                            () -> _testEntityRegistry))
+                    .graphRetriever(GraphRetriever.EMPTY)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             opContext ->
-                ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+                ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
                     .setSystemOperationContext(opContext),
             null);
   }
@@ -152,25 +156,25 @@ public void testIngestListLatestAspects() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)));
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -230,25 +234,25 @@ public void testIngestListUrns() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)));
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -310,11 +314,11 @@ public void testSystemMetadataDuplicateKey() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata)
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(item))
             .build(),
         false,
@@ -356,7 +360,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(
                 List.of(
                     ChangeItemImpl.builder()
@@ -365,7 +369,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
                         .recordTemplate(new Status().setRemoved(false))
                         .systemMetadata(systemMetadata)
                         .auditStamp(TEST_AUDIT_STAMP)
-                        .build(TestOperationContexts.emptyAspectRetriever(null))))
+                        .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null))))
             .build(),
         false,
         true);
@@ -600,7 +604,7 @@ public void run() {
           auditStamp.setTime(System.currentTimeMillis());
           AspectsBatchImpl batch =
               AspectsBatchImpl.builder()
-                  .mcps(mcps, auditStamp, operationContext.getRetrieverContext().get())
+                  .mcps(mcps, auditStamp, operationContext.getRetrieverContext())
                   .build();
           entityService.ingestProposal(operationContext, batch, false);
         }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
index 2d59632e6f3c6..c00632e5cf542 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
@@ -945,32 +945,32 @@ public void testRollbackAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1037,25 +1037,25 @@ public void testRollbackKey() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(keyAspectName)
                 .recordTemplate(writeKey1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1130,39 +1130,39 @@ public void testRollbackUrn() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(keyAspectName)
                 .recordTemplate(writeKey1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1208,11 +1208,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1264,11 +1264,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata2)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1320,11 +1320,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
                 .recordTemplate(writeAspect1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1347,11 +1347,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1416,11 +1416,11 @@ public void testIngestSameAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1472,11 +1472,11 @@ public void testIngestSameAspect() throws AssertionError {
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1534,46 +1534,46 @@ public void testRetention() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1a)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1b)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2a)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2b)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1610,18 +1610,18 @@ public void testRetention() throws AssertionError {
                 .recordTemplate(writeAspect1c)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2c)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1982,8 +1982,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
           stream
               .map(
                   entityAspect ->
-                      EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(), entityAspect)
+                      EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
                           .get()
                           .getAspect(StructuredPropertyDefinition.class))
               .collect(Collectors.toSet());
@@ -1995,7 +1994,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
     SystemEntityClient mockSystemEntityClient = Mockito.mock(SystemEntityClient.class);
     Mockito.when(
             mockSystemEntityClient.getLatestAspectObject(
-                any(OperationContext.class), eq(firstPropertyUrn), eq("propertyDefinition")))
+                any(OperationContext.class),
+                eq(firstPropertyUrn),
+                eq("propertyDefinition"),
+                anyBoolean()))
         .thenReturn(new com.linkedin.entity.Aspect(structuredPropertyDefinition.data()));
 
     // Add a value for that property
@@ -2062,8 +2064,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
           stream
               .map(
                   entityAspect ->
-                      EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(), entityAspect)
+                      EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
                           .get()
                           .getAspect(StructuredPropertyDefinition.class))
               .collect(Collectors.toSet());
@@ -2074,7 +2075,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
 
     Mockito.when(
             mockSystemEntityClient.getLatestAspectObject(
-                any(OperationContext.class), eq(secondPropertyUrn), eq("propertyDefinition")))
+                any(OperationContext.class),
+                eq(secondPropertyUrn),
+                eq("propertyDefinition"),
+                anyBoolean()))
         .thenReturn(new com.linkedin.entity.Aspect(secondDefinition.data()));
 
     // Get existing value for first structured property
@@ -2209,7 +2213,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     ChangeItemImpl item2 =
         ChangeItemImpl.builder()
             .urn(entityUrn)
@@ -2217,11 +2221,11 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(false))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(item1, item2))
             .build(),
         false,
@@ -2269,7 +2273,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
                     .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd2 =
         PatchItemImpl.builder()
@@ -2311,7 +2315,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2320,7 +2324,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd2, patchRemoveNonExistent))
             .build(),
         false,
@@ -2368,7 +2372,7 @@ public void testBatchPatchAdd() throws Exception {
                     .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd3 =
         PatchItemImpl.builder()
@@ -2428,7 +2432,7 @@ public void testBatchPatchAdd() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2437,7 +2441,7 @@ public void testBatchPatchAdd() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd3, patchAdd2, patchAdd1))
             .build(),
         false,
@@ -2491,7 +2495,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
             .recordTemplate(new GlobalTags().setTags(new TagAssociationArray(initialTags)))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd2 =
         PatchItemImpl.builder()
@@ -2516,7 +2520,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2525,7 +2529,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd2, patchAdd2)) // duplicate
             .build(),
         false,
@@ -2581,7 +2585,7 @@ public void testPatchRemoveNonExistent() throws Exception {
         _entityServiceImpl.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(patchRemove))
                 .build(),
             false,
@@ -2638,7 +2642,7 @@ public void testPatchAddNonExistent() throws Exception {
         _entityServiceImpl.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(patchAdd))
                 .build(),
             false,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
index 550f55e6bfd0b..b4fbfecc9d60d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
@@ -10,11 +10,13 @@
 import com.linkedin.metadata.AspectGenerationUtils;
 import com.linkedin.metadata.AspectIngestionUtils;
 import com.linkedin.metadata.CassandraTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.entity.EntityServiceAspectRetriever;
 import com.linkedin.metadata.entity.EntityServiceImpl;
 import com.linkedin.metadata.entity.EntityServiceTest;
 import com.linkedin.metadata.entity.ListResult;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.event.EventProducer;
 import com.linkedin.metadata.key.CorpUserKey;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
@@ -93,12 +95,15 @@ private void configureComponents() {
                             .entityService(_entityServiceImpl)
                             .entityRegistry(_testEntityRegistry)
                             .build())
-                    .graphRetriever(TestOperationContexts.emptyGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(
+                            () -> _testEntityRegistry))
+                    .graphRetriever(GraphRetriever.EMPTY)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             opContext ->
-                ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+                ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
                     .setSystemOperationContext(opContext),
             null);
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
index 3f6b301e72aa5..0a867ae3c8f2e 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
@@ -26,7 +26,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     ChangeItemImpl item2 =
         ChangeItemImpl.builder()
             .urn(entityUrn)
@@ -34,7 +34,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(false))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     assertFalse(item1.isDatabaseDuplicateOf(item2));
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
index ca42f0327c86d..8f68f119cb0b7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
@@ -11,6 +11,7 @@
 import com.linkedin.metadata.recommendation.ranker.SimpleRecommendationRanker;
 import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.net.URISyntaxException;
+import java.nio.file.AccessDeniedException;
 import java.util.List;
 import java.util.stream.Collectors;
 import org.testng.annotations.Test;
@@ -74,7 +75,7 @@ private List<RecommendationContent> getContentFromUrns(List<Urn> urns) {
   }
 
   @Test
-  public void testService() throws URISyntaxException {
+  public void testService() throws URISyntaxException, AccessDeniedException {
     // Test non-eligible and empty
     RecommendationsService service =
         new RecommendationsService(ImmutableList.of(nonEligibleSource, emptySource), ranker);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
index 1661f5f02ee59..fa895cb454011 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
@@ -21,7 +21,8 @@
 import com.linkedin.data.ByteString;
 import com.linkedin.entity.Aspect;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCLItem;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -46,7 +47,6 @@
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import com.linkedin.test.metadata.aspect.batch.TestMCP;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
@@ -87,18 +87,18 @@ public class SchemaFieldSideEffectTest {
                       .build()))
           .build();
 
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
index fd768424e13c1..1825b65a18ab1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
@@ -20,6 +20,7 @@
 import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.query.filter.Condition;
@@ -71,8 +72,10 @@ public void init() {
             () ->
                 io.datahubproject.metadata.context.RetrieverContext.builder()
                     .aspectRetriever(mockAspectRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
                     .graphRetriever(mockGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
index 8741e24b1bca5..de375271ed660 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
@@ -13,13 +13,14 @@
 import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
 
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.RetrieverContext;
 import com.linkedin.metadata.aspect.models.graph.Edge;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.query.filter.Condition;
@@ -54,7 +55,7 @@ public class DomainExpansionRewriterTest
   @BeforeMethod
   public void init() {
     EntityRegistry entityRegistry = new TestEntityRegistry();
-    AspectRetriever mockAspectRetriever = mock(AspectRetriever.class);
+    CachingAspectRetriever mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry);
 
     mockGraphRetriever = spy(GraphRetriever.class);
@@ -71,8 +72,10 @@ public void init() {
             () ->
                 io.datahubproject.metadata.context.RetrieverContext.builder()
                     .aspectRetriever(mockAspectRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
                     .graphRetriever(mockGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index c68997e25bcff..d6f5f9c3eedbe 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -18,6 +18,7 @@
 import com.linkedin.data.template.StringArray;
 import com.linkedin.entity.Aspect;
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.annotation.SearchableAnnotation;
@@ -49,8 +50,8 @@
 
 public class AggregationQueryBuilderTest {
 
-  private static AspectRetriever aspectRetriever;
-  private static AspectRetriever aspectRetrieverV1;
+  private static CachingAspectRetriever aspectRetriever;
+  private static CachingAspectRetriever aspectRetrieverV1;
   private static String DEFAULT_FILTER = "_index";
 
   @BeforeClass
@@ -61,7 +62,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
         Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
 
     // legacy
-    aspectRetriever = mock(AspectRetriever.class);
+    aspectRetriever = mock(CachingAspectRetriever.class);
     when(aspectRetriever.getEntityRegistry())
         .thenReturn(TestOperationContexts.defaultEntityRegistry());
 
@@ -106,7 +107,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
                     new Aspect(structPropUnderscoresAndDotsDefinition.data()))));
 
     // V1
-    aspectRetrieverV1 = mock(AspectRetriever.class);
+    aspectRetrieverV1 = mock(CachingAspectRetriever.class);
     when(aspectRetrieverV1.getEntityRegistry())
         .thenReturn(TestOperationContexts.defaultEntityRegistry());
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 393ca3ca5d4a6..e51511699e345 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -662,6 +662,7 @@ public void testInvalidStructuredProperty() {
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(TestOperationContexts.emptyActiveUsersAspectRetriever(null))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
index 2c5bcd1294fa1..65b73b7425b74 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
@@ -247,6 +247,9 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -301,6 +304,9 @@ public void testSetSearchableRefValue_RuntimeException()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -337,6 +343,9 @@ public void testSetSearchableRefValue_RuntimeException_URNExist()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -369,6 +378,9 @@ void testSetSearchableRefValue_WithInvalidURN()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
index b1b716c560481..9a0a82c7f9f49 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
@@ -18,7 +18,8 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.entity.Aspect;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.batch.PatchMCP;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -36,7 +37,6 @@
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import com.linkedin.test.metadata.aspect.batch.TestMCL;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import jakarta.json.Json;
 import jakarta.json.JsonPatch;
 import java.util.List;
@@ -76,13 +76,13 @@ public class PropertyDefinitionDeleteSideEffectTest {
   private static final Urn TEST_DATASET_URN =
       UrnUtils.getUrn(
           "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private SearchRetriever mockSearchRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     when(mockAspectRetriever.getLatestAspectObject(
             eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)))
@@ -101,8 +101,8 @@ public void setup() {
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mockSearchRetriever)
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
index 2503faa00f6e7..6e8886f495c95 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
@@ -58,7 +58,7 @@ public void setup() {
     mockGraphRetriever = Mockito.mock(GraphRetriever.class);
     retrieverContext =
         io.datahubproject.metadata.context.RetrieverContext.builder()
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .searchRetriever(mockSearchRetriever)
             .graphRetriever(mockGraphRetriever)
             .build();
diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index 3acd2bf341357..02cd28eb202e9 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -171,10 +171,7 @@ public Stream<List<MetadataChangeProposal>> generateMCPs(
                         DefaultAspectsUtil.getAdditionalChanges(
                                 opContext,
                                 AspectsBatchImpl.builder()
-                                    .mcps(
-                                        List.of(mcp),
-                                        auditStamp,
-                                        opContext.getRetrieverContext().get())
+                                    .mcps(List.of(mcp), auditStamp, opContext.getRetrieverContext())
                                     .build()
                                     .getMCPItems(),
                                 entityService,
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
index cf9d73dfa729b..f16c9dbd82e74 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
@@ -20,7 +20,6 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.OperationContextConfig;
-import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.ServicesRegistryContext;
 import io.datahubproject.metadata.context.ValidationContext;
 import io.datahubproject.test.metadata.context.TestOperationContexts;
@@ -95,7 +94,7 @@ public OperationContext operationContext(
         entityRegistry,
         mock(ServicesRegistryContext.class),
         indexConvention,
-        mock(RetrieverContext.class),
+        TestOperationContexts.emptyActiveUsersRetrieverContext(() -> entityRegistry),
         mock(ValidationContext.class));
   }
 
diff --git a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
index 47740b02d6166..65ee6b8591f48 100644
--- a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
+++ b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
@@ -93,8 +93,6 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
                 new RelatedEntity(BUSINESS_ATTRIBUTE_OF, SCHEMA_FIELD_URN.toString())));
 
     when(opContext
-            .getRetrieverContext()
-            .get()
             .getAspectRetriever()
             .getLatestAspectObjects(
                 eq(Set.of(SCHEMA_FIELD_URN)), eq(Set.of(BUSINESS_ATTRIBUTE_ASPECT))))
@@ -108,7 +106,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
 
     // verify
     // page 1
-    Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+    Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
         .scrollRelatedEntities(
             isNull(),
             any(Filter.class),
@@ -122,7 +120,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
             isNull(),
             isNull());
     // page 2
-    Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+    Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
         .scrollRelatedEntities(
             isNull(),
             any(Filter.class),
@@ -136,7 +134,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
             isNull(),
             isNull());
 
-    Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
+    Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().getGraphRetriever());
 
     // 2 pages = 2 ingest proposals
     Mockito.verify(mockUpdateIndicesService, Mockito.times(2))
@@ -152,8 +150,8 @@ private void testMCLOnInvalidCategory() throws Exception {
     businessAttributeServiceHook.handleChangeEvent(opContext, platformEvent);
 
     // verify
-    Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
-    Mockito.verifyNoInteractions(opContext.getAspectRetrieverOpt().get());
+    Mockito.verifyNoInteractions(opContext.getRetrieverContext().getGraphRetriever());
+    Mockito.verifyNoInteractions(opContext.getAspectRetriever());
     Mockito.verifyNoInteractions(mockUpdateIndicesService);
   }
 
@@ -226,13 +224,15 @@ private OperationContext mockOperationContextWithGraph(List<RelatedEntity> graph
 
     RetrieverContext mockRetrieverContext = mock(RetrieverContext.class);
     when(mockRetrieverContext.getAspectRetriever()).thenReturn(mock(AspectRetriever.class));
+    when(mockRetrieverContext.getCachingAspectRetriever())
+        .thenReturn(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     when(mockRetrieverContext.getGraphRetriever()).thenReturn(graphRetriever);
 
     OperationContext opContext =
         TestOperationContexts.systemContextNoSearchAuthorization(mockRetrieverContext);
 
     // reset mock for test
-    reset(opContext.getAspectRetrieverOpt().get());
+    reset(opContext.getAspectRetriever());
 
     if (!graphEdges.isEmpty()) {
 
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
index e65bf22991736..c08b7fad4dee3 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
@@ -1,12 +1,23 @@
 package io.datahubproject.metadata.context;
 
+import static com.linkedin.metadata.Constants.CORP_USER_KEY_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_SUSPENDED;
+import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.SYSTEM_ACTOR;
+
 import com.datahub.authentication.Authentication;
+import com.linkedin.common.Status;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserStatus;
+import com.linkedin.metadata.aspect.AspectRetriever;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.policy.DataHubPolicyInfo;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import lombok.Builder;
@@ -48,6 +59,43 @@ public Urn getActorUrn() {
     return UrnUtils.getUrn(authentication.getActor().toUrnStr());
   }
 
+  /**
+   * Actor is considered active if the user is not hard-deleted, soft-deleted, and is not suspended
+   *
+   * @param aspectRetriever aspect retriever - ideally the SystemEntityClient backed one for caching
+   * @return active status
+   */
+  public boolean isActive(AspectRetriever aspectRetriever) {
+    // system cannot be disabled
+    if (SYSTEM_ACTOR.equals(authentication.getActor().toUrnStr())) {
+      return true;
+    }
+
+    Urn selfUrn = UrnUtils.getUrn(authentication.getActor().toUrnStr());
+    Map<Urn, Map<String, Aspect>> urnAspectMap =
+        aspectRetriever.getLatestAspectObjects(
+            Set.of(selfUrn),
+            Set.of(STATUS_ASPECT_NAME, CORP_USER_STATUS_ASPECT_NAME, CORP_USER_KEY_ASPECT_NAME));
+
+    Map<String, Aspect> aspectMap = urnAspectMap.getOrDefault(selfUrn, Map.of());
+
+    if (!aspectMap.containsKey(CORP_USER_KEY_ASPECT_NAME)) {
+      // user is hard deleted
+      return false;
+    }
+
+    Status status =
+        Optional.ofNullable(aspectMap.get(STATUS_ASPECT_NAME))
+            .map(a -> new Status(a.data()))
+            .orElse(new Status().setRemoved(false));
+    CorpUserStatus corpUserStatus =
+        Optional.ofNullable(aspectMap.get(CORP_USER_STATUS_ASPECT_NAME))
+            .map(a -> new CorpUserStatus(a.data()))
+            .orElse(new CorpUserStatus().setStatus(""));
+
+    return !status.isRemoved() && !CORP_USER_STATUS_SUSPENDED.equals(corpUserStatus.getStatus());
+  }
+
   /**
    * The current implementation creates a cache entry unique for the set of policies.
    *
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
index 9a058c526647c..9158129235b39 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
@@ -16,6 +16,8 @@
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.utils.AuditStampUtils;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import io.datahubproject.metadata.exception.ActorAccessException;
+import io.datahubproject.metadata.exception.OperationContextException;
 import java.util.Collection;
 import java.util.Objects;
 import java.util.Optional;
@@ -63,6 +65,24 @@ public static OperationContext asSession(
       @Nonnull Authorizer authorizer,
       @Nonnull Authentication sessionAuthentication,
       boolean allowSystemAuthentication) {
+    return OperationContext.asSession(
+        systemOperationContext,
+        requestContext,
+        authorizer,
+        sessionAuthentication,
+        allowSystemAuthentication,
+        false);
+  }
+
+  @Nonnull
+  public static OperationContext asSession(
+      OperationContext systemOperationContext,
+      @Nonnull RequestContext requestContext,
+      @Nonnull Authorizer authorizer,
+      @Nonnull Authentication sessionAuthentication,
+      boolean allowSystemAuthentication,
+      boolean skipCache)
+      throws ActorAccessException {
     return systemOperationContext.toBuilder()
         .operationContextConfig(
             // update allowed system authentication
@@ -72,7 +92,7 @@ public static OperationContext asSession(
         .authorizationContext(AuthorizationContext.builder().authorizer(authorizer).build())
         .requestContext(requestContext)
         .validationContext(systemOperationContext.getValidationContext())
-        .build(sessionAuthentication);
+        .build(sessionAuthentication, skipCache);
   }
 
   /**
@@ -85,10 +105,14 @@ public static OperationContext asSession(
   public static OperationContext withSearchFlags(
       OperationContext opContext, Function<SearchFlags, SearchFlags> flagDefaults) {
 
-    return opContext.toBuilder()
-        // update search flags for the request's session
-        .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
-        .build(opContext.getSessionActorContext());
+    try {
+      return opContext.toBuilder()
+          // update search flags for the request's session
+          .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
+          .build(opContext.getSessionActorContext(), false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -101,10 +125,14 @@ public static OperationContext withSearchFlags(
   public static OperationContext withLineageFlags(
       OperationContext opContext, Function<LineageFlags, LineageFlags> flagDefaults) {
 
-    return opContext.toBuilder()
-        // update lineage flags for the request's session
-        .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
-        .build(opContext.getSessionActorContext());
+    try {
+      return opContext.toBuilder()
+          // update lineage flags for the request's session
+          .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
+          .build(opContext.getSessionActorContext(), false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -155,18 +183,22 @@ public static OperationContext asSystem(
             ? SearchContext.EMPTY
             : SearchContext.builder().indexConvention(indexConvention).build();
 
-    return OperationContext.builder()
-        .operationContextConfig(systemConfig)
-        .systemActorContext(systemActorContext)
-        .searchContext(systemSearchContext)
-        .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
-        .servicesRegistryContext(servicesRegistryContext)
-        // Authorizer.EMPTY doesn't actually apply to system auth
-        .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
-        .retrieverContext(retrieverContext)
-        .objectMapperContext(objectMapperContext)
-        .validationContext(validationContext)
-        .build(systemAuthentication);
+    try {
+      return OperationContext.builder()
+          .operationContextConfig(systemConfig)
+          .systemActorContext(systemActorContext)
+          .searchContext(systemSearchContext)
+          .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
+          .servicesRegistryContext(servicesRegistryContext)
+          // Authorizer.EMPTY doesn't actually apply to system auth
+          .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
+          .retrieverContext(retrieverContext)
+          .objectMapperContext(objectMapperContext)
+          .validationContext(validationContext)
+          .build(systemAuthentication, false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   @Nonnull private final OperationContextConfig operationContextConfig;
@@ -177,7 +209,7 @@ public static OperationContext asSystem(
   @Nonnull private final EntityRegistryContext entityRegistryContext;
   @Nullable private final ServicesRegistryContext servicesRegistryContext;
   @Nullable private final RequestContext requestContext;
-  @Nullable private final RetrieverContext retrieverContext;
+  @Nonnull private final RetrieverContext retrieverContext;
   @Nonnull private final ObjectMapperContext objectMapperContext;
   @Nonnull private final ValidationContext validationContext;
 
@@ -194,13 +226,15 @@ public OperationContext withLineageFlags(
   public OperationContext asSession(
       @Nonnull RequestContext requestContext,
       @Nonnull Authorizer authorizer,
-      @Nonnull Authentication sessionAuthentication) {
+      @Nonnull Authentication sessionAuthentication)
+      throws ActorAccessException {
     return OperationContext.asSession(
         this,
         requestContext,
         authorizer,
         sessionAuthentication,
-        getOperationContextConfig().isAllowSystemAuthentication());
+        getOperationContextConfig().isAllowSystemAuthentication(),
+        false);
   }
 
   @Nonnull
@@ -284,17 +318,9 @@ public AuditStamp getAuditStamp() {
     return getAuditStamp(null);
   }
 
-  public Optional<RetrieverContext> getRetrieverContext() {
-    return Optional.ofNullable(retrieverContext);
-  }
-
-  @Nullable
+  @Nonnull
   public AspectRetriever getAspectRetriever() {
-    return getAspectRetrieverOpt().orElse(null);
-  }
-
-  public Optional<AspectRetriever> getAspectRetrieverOpt() {
-    return getRetrieverContext().map(RetrieverContext::getAspectRetriever);
+    return retrieverContext.getAspectRetriever();
   }
 
   /**
@@ -336,10 +362,7 @@ public String getGlobalContextId() {
                     ? EmptyContext.EMPTY
                     : getServicesRegistryContext())
             .add(getRequestContext() == null ? EmptyContext.EMPTY : getRequestContext())
-            .add(
-                getRetrieverContext().isPresent()
-                    ? getRetrieverContext().get()
-                    : EmptyContext.EMPTY)
+            .add(getRetrieverContext())
             .add(getObjectMapperContext())
             .build()
             .stream()
@@ -364,10 +387,7 @@ public String getSearchContextId() {
                 getServicesRegistryContext() == null
                     ? EmptyContext.EMPTY
                     : getServicesRegistryContext())
-            .add(
-                getRetrieverContext().isPresent()
-                    ? getRetrieverContext().get()
-                    : EmptyContext.EMPTY)
+            .add(getRetrieverContext())
             .build()
             .stream()
             .map(ContextInterface::getCacheKeyComponent)
@@ -438,6 +458,12 @@ public static class OperationContextBuilder {
 
     @Nonnull
     public OperationContext build(@Nonnull Authentication sessionAuthentication) {
+      return build(sessionAuthentication, false);
+    }
+
+    @Nonnull
+    public OperationContext build(
+        @Nonnull Authentication sessionAuthentication, boolean skipCache) {
       final Urn actorUrn = UrnUtils.getUrn(sessionAuthentication.getActor().toUrnStr());
       final ActorContext sessionActor =
           ActorContext.builder()
@@ -451,11 +477,20 @@ public OperationContext build(@Nonnull Authentication sessionAuthentication) {
               .policyInfoSet(this.authorizationContext.getAuthorizer().getActorPolicies(actorUrn))
               .groupMembership(this.authorizationContext.getAuthorizer().getActorGroups(actorUrn))
               .build();
-      return build(sessionActor);
+      return build(sessionActor, skipCache);
     }
 
     @Nonnull
-    public OperationContext build(@Nonnull ActorContext sessionActor) {
+    public OperationContext build(@Nonnull ActorContext sessionActor, boolean skipCache) {
+      AspectRetriever retriever =
+          skipCache
+              ? this.retrieverContext.getAspectRetriever()
+              : this.retrieverContext.getCachingAspectRetriever();
+
+      if (!sessionActor.isActive(retriever)) {
+        throw new ActorAccessException("Actor is not active");
+      }
+
       return new OperationContext(
           this.operationContextConfig,
           sessionActor,
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
index 9337fbfe3bb00..9afc4138810bb 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
@@ -1,8 +1,10 @@
 package io.datahubproject.metadata.context;
 
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.entity.SearchRetriever;
+import java.util.Objects;
 import java.util.Optional;
 import javax.annotation.Nonnull;
 import lombok.Builder;
@@ -15,10 +17,37 @@ public class RetrieverContext
 
   @Nonnull private final GraphRetriever graphRetriever;
   @Nonnull private final AspectRetriever aspectRetriever;
+  @Nonnull private final CachingAspectRetriever cachingAspectRetriever;
   @Nonnull private final SearchRetriever searchRetriever;
 
   @Override
   public Optional<Integer> getCacheKeyComponent() {
     return Optional.empty();
   }
+
+  public static class RetrieverContextBuilder {
+    public RetrieverContext build() {
+      if (this.aspectRetriever == null && this.cachingAspectRetriever != null) {
+        this.aspectRetriever = this.cachingAspectRetriever;
+      }
+
+      if (this.cachingAspectRetriever == null
+          && this.aspectRetriever instanceof CachingAspectRetriever) {
+        this.cachingAspectRetriever = (CachingAspectRetriever) this.aspectRetriever;
+      }
+
+      return new RetrieverContext(
+          this.graphRetriever,
+          Objects.requireNonNull(this.aspectRetriever),
+          Objects.requireNonNull(this.cachingAspectRetriever),
+          this.searchRetriever);
+    }
+  }
+
+  public static final RetrieverContext EMPTY =
+      RetrieverContext.builder()
+          .graphRetriever(GraphRetriever.EMPTY)
+          .searchRetriever(SearchRetriever.EMPTY)
+          .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
+          .build();
 }
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
new file mode 100644
index 0000000000000..bca2594b96430
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
@@ -0,0 +1,7 @@
+package io.datahubproject.metadata.exception;
+
+public class ActorAccessException extends OperationContextException {
+  public ActorAccessException(String string) {
+    super(string);
+  }
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
new file mode 100644
index 0000000000000..1aac8dc3e60ec
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
@@ -0,0 +1,9 @@
+package io.datahubproject.metadata.exception;
+
+public class OperationContextException extends RuntimeException {
+  public OperationContextException(String message) {
+    super(message);
+  }
+
+  public OperationContextException() {}
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
index 42de6b7398c61..4abfbb196f067 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
@@ -8,21 +8,17 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserInfo;
+import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
-import com.linkedin.metadata.aspect.SystemAspect;
-import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
 import com.linkedin.metadata.models.registry.MergedEntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
-import com.linkedin.metadata.query.filter.Filter;
-import com.linkedin.metadata.query.filter.RelationshipFilter;
-import com.linkedin.metadata.query.filter.SortCriterion;
-import com.linkedin.metadata.search.ScrollResult;
-import com.linkedin.metadata.search.SearchEntityArray;
 import com.linkedin.metadata.snapshot.Snapshot;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
@@ -32,15 +28,14 @@
 import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.ServicesRegistryContext;
 import io.datahubproject.metadata.context.ValidationContext;
-import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
-import lombok.Builder;
 
 /**
  * Useful for testing. If the defaults are not sufficient, try using the .toBuilder() and replacing
@@ -81,26 +76,53 @@ public static EntityRegistry defaultEntityRegistry() {
     return defaultEntityRegistryInstance;
   }
 
-  public static AspectRetriever emptyAspectRetriever(
+  public static RetrieverContext emptyActiveUsersRetrieverContext(
       @Nullable Supplier<EntityRegistry> entityRegistrySupplier) {
-    return new EmptyAspectRetriever(
-        () ->
-            Optional.ofNullable(entityRegistrySupplier)
-                .map(Supplier::get)
-                .orElse(defaultEntityRegistry()));
-  }
 
-  public static GraphRetriever emptyGraphRetriever = new EmptyGraphRetriever();
-  public static SearchRetriever emptySearchRetriever = new EmptySearchRetriever();
+    return RetrieverContext.builder()
+        .cachingAspectRetriever(emptyActiveUsersAspectRetriever(entityRegistrySupplier))
+        .graphRetriever(GraphRetriever.EMPTY)
+        .searchRetriever(SearchRetriever.EMPTY)
+        .build();
+  }
 
-  public static RetrieverContext emptyRetrieverContext(
+  public static CachingAspectRetriever emptyActiveUsersAspectRetriever(
       @Nullable Supplier<EntityRegistry> entityRegistrySupplier) {
 
-    return RetrieverContext.builder()
-        .aspectRetriever(emptyAspectRetriever(entityRegistrySupplier))
-        .graphRetriever(emptyGraphRetriever)
-        .searchRetriever(emptySearchRetriever)
-        .build();
+    return new CachingAspectRetriever.EmptyAspectRetriever() {
+
+      @Nonnull
+      @Override
+      public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
+          Set<Urn> urns, Set<String> aspectNames) {
+        if (urns.stream().allMatch(urn -> urn.toString().startsWith("urn:li:corpuser:"))
+            && aspectNames.contains(Constants.CORP_USER_KEY_ASPECT_NAME)) {
+          return urns.stream()
+              .map(
+                  urn ->
+                      Map.entry(
+                          urn,
+                          Map.of(
+                              Constants.CORP_USER_KEY_ASPECT_NAME,
+                              new Aspect(
+                                  new CorpUserInfo()
+                                      .setActive(true)
+                                      .setEmail(urn.getId())
+                                      .setDisplayName(urn.getId())
+                                      .data()))))
+              .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+        }
+        return super.getLatestAspectObjects(urns, aspectNames);
+      }
+
+      @Nonnull
+      @Override
+      public EntityRegistry getEntityRegistry() {
+        return Optional.ofNullable(entityRegistrySupplier)
+            .map(Supplier::get)
+            .orElse(defaultEntityRegistry());
+      }
+    };
   }
 
   public static OperationContext systemContextNoSearchAuthorization(
@@ -140,8 +162,10 @@ public static OperationContext systemContextNoSearchAuthorization(
     RetrieverContext retrieverContext =
         RetrieverContext.builder()
             .aspectRetriever(aspectRetriever)
-            .graphRetriever(emptyGraphRetriever)
-            .searchRetriever(emptySearchRetriever)
+            .cachingAspectRetriever(
+                emptyActiveUsersAspectRetriever(() -> aspectRetriever.getEntityRegistry()))
+            .graphRetriever(GraphRetriever.EMPTY)
+            .searchRetriever(SearchRetriever.EMPTY)
             .build();
     return systemContextNoSearchAuthorization(
         () -> retrieverContext.getAspectRetriever().getEntityRegistry(),
@@ -208,7 +232,7 @@ public static OperationContext systemContext(
     RetrieverContext retrieverContext =
         Optional.ofNullable(retrieverContextSupplier)
             .map(Supplier::get)
-            .orElse(emptyRetrieverContext(entityRegistrySupplier));
+            .orElse(emptyActiveUsersRetrieverContext(entityRegistrySupplier));
 
     EntityRegistry entityRegistry =
         Optional.ofNullable(entityRegistrySupplier)
@@ -298,66 +322,5 @@ public static OperationContext userContextNoSearchAuthorization(
         .asSession(requestContext, Authorizer.EMPTY, TEST_USER_AUTH);
   }
 
-  @Builder
-  public static class EmptyAspectRetriever implements AspectRetriever {
-    private final Supplier<EntityRegistry> entityRegistrySupplier;
-
-    @Nonnull
-    @Override
-    public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
-        Set<Urn> urns, Set<String> aspectNames) {
-      return Map.of();
-    }
-
-    @Nonnull
-    @Override
-    public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
-        Map<Urn, Set<String>> urnAspectNames) {
-      return Map.of();
-    }
-
-    @Nonnull
-    @Override
-    public EntityRegistry getEntityRegistry() {
-      return entityRegistrySupplier.get();
-    }
-  }
-
-  public static class EmptyGraphRetriever implements GraphRetriever {
-
-    @Nonnull
-    @Override
-    public RelatedEntitiesScrollResult scrollRelatedEntities(
-        @Nullable List<String> sourceTypes,
-        @Nonnull Filter sourceEntityFilter,
-        @Nullable List<String> destinationTypes,
-        @Nonnull Filter destinationEntityFilter,
-        @Nonnull List<String> relationshipTypes,
-        @Nonnull RelationshipFilter relationshipFilter,
-        @Nonnull List<SortCriterion> sortCriterion,
-        @Nullable String scrollId,
-        int count,
-        @Nullable Long startTimeMillis,
-        @Nullable Long endTimeMillis) {
-      return new RelatedEntitiesScrollResult(0, 0, null, List.of());
-    }
-  }
-
-  public static class EmptySearchRetriever implements SearchRetriever {
-
-    @Override
-    public ScrollResult scroll(
-        @Nonnull List<String> entities,
-        @Nullable Filter filters,
-        @Nullable String scrollId,
-        int count) {
-      ScrollResult empty = new ScrollResult();
-      empty.setEntities(new SearchEntityArray());
-      empty.setNumEntities(0);
-      empty.setPageSize(0);
-      return empty;
-    }
-  }
-
   private TestOperationContexts() {}
 }
diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
index 3e092e20127ee..f77b244d8f2d8 100644
--- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
+++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
@@ -8,6 +8,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import io.datahubproject.test.metadata.context.TestOperationContexts;
 import org.testng.annotations.Test;
 
 public class OperationContextTest {
@@ -25,7 +26,7 @@ public void testSystemPrivilegeEscalation() {
             mock(EntityRegistry.class),
             mock(ServicesRegistryContext.class),
             null,
-            mock(RetrieverContext.class),
+            TestOperationContexts.emptyActiveUsersRetrieverContext(null),
             mock(ValidationContext.class));
 
     OperationContext opContext =
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
index 6724f35d840ad..a9871f1ed9948 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
@@ -145,7 +145,7 @@ public String generateAccessToken(
     _entityService.ingestProposal(
         systemOperationContext,
         AspectsBatchImpl.builder()
-            .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext().get())
+            .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext())
             .build(),
         false);
 
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 9348416606d0a..75b4c8e8b002f 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -522,12 +522,12 @@ cache:
       entityAspectTTLSeconds:
         # cache user aspects for 20s
         corpuser:
-          corpUserKey: 20
+          corpUserKey: 300 # 5 min
           corpUserInfo: 20
           corpUserEditableInfo: 20
-          corpUserStatus: 20
+          corpUserStatus: 300 # 5 min
           globalTags: 20
-          status: 20
+          status: 300 # 5 min
           corpUserCredentials: 20
           corpUserSettings: 20
           roleMembership: 20
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
index f5235dc3682fc..3e2823591e168 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
@@ -45,7 +45,8 @@ protected OperationContext javaSystemOperationContext(
       @Nonnull final SearchService searchService,
       @Qualifier("baseElasticSearchComponents")
           BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
-      @Nonnull final ConfigurationProvider configurationProvider) {
+      @Nonnull final ConfigurationProvider configurationProvider,
+      @Qualifier("systemEntityClient") @Nonnull final SystemEntityClient systemEntityClient) {
 
     EntityServiceAspectRetriever entityServiceAspectRetriever =
         EntityServiceAspectRetriever.builder()
@@ -53,6 +54,9 @@ protected OperationContext javaSystemOperationContext(
             .entityService(entityService)
             .build();
 
+    EntityClientAspectRetriever entityClientAspectRetriever =
+        EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
+
     SystemGraphRetriever systemGraphRetriever =
         SystemGraphRetriever.builder().graphService(graphService).build();
 
@@ -68,6 +72,7 @@ protected OperationContext javaSystemOperationContext(
             components.getIndexConvention(),
             RetrieverContext.builder()
                 .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(entityClientAspectRetriever)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
@@ -76,6 +81,7 @@ protected OperationContext javaSystemOperationContext(
                     configurationProvider.getFeatureFlags().isAlternateMCPValidation())
                 .build());
 
+    entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
     entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
     systemGraphRetriever.setSystemOperationContext(systemOperationContext);
     searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
@@ -104,7 +110,7 @@ protected OperationContext restliSystemOperationContext(
           BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
       @Nonnull final ConfigurationProvider configurationProvider) {
 
-    EntityClientAspectRetriever entityServiceAspectRetriever =
+    EntityClientAspectRetriever entityClientAspectRetriever =
         EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
 
     SystemGraphRetriever systemGraphRetriever =
@@ -121,7 +127,7 @@ protected OperationContext restliSystemOperationContext(
             ServicesRegistryContext.builder().restrictedService(restrictedService).build(),
             components.getIndexConvention(),
             RetrieverContext.builder()
-                .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(entityClientAspectRetriever)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
@@ -130,7 +136,7 @@ protected OperationContext restliSystemOperationContext(
                     configurationProvider.getFeatureFlags().isAlternateMCPValidation())
                 .build());
 
-    entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
+    entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
     systemGraphRetriever.setSystemOperationContext(systemOperationContext);
     searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
index 22ce06a5984ea..c04dd25ccd4ac 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
@@ -84,14 +84,14 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc
                   .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME)
                   .recordTemplate(dataPlatformInstance.get())
                   .auditStamp(aspectAuditStamp)
-                  .build(systemOperationContext.getAspectRetrieverOpt().get()));
+                  .build(systemOperationContext.getAspectRetriever()));
         }
       }
 
       _entityService.ingestAspects(
           systemOperationContext,
           AspectsBatchImpl.builder()
-              .retrieverContext(systemOperationContext.getRetrieverContext().get())
+              .retrieverContext(systemOperationContext.getRetrieverContext())
               .items(items)
               .build(),
           true,
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
index eb6bfe17ac198..dac2879487469 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
@@ -225,7 +225,7 @@ private void ingestPolicy(
                 new AuditStamp()
                     .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR))
                     .setTime(System.currentTimeMillis()),
-                systemOperationContext.getRetrieverContext().get())
+                systemOperationContext.getRetrieverContext())
             .build(),
         false);
   }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
similarity index 81%
rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
index ba0a426fa20e8..c756827cad56b 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
@@ -1,9 +1,11 @@
-package io.datahubproject.openapi;
+package io.datahubproject.openapi.config;
 
 import com.linkedin.metadata.dao.throttle.APIThrottleException;
+import io.datahubproject.metadata.exception.ActorAccessException;
 import io.datahubproject.openapi.exception.InvalidUrnException;
 import io.datahubproject.openapi.exception.UnauthorizedException;
 import java.util.Map;
+import javax.annotation.PostConstruct;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.ConversionNotSupportedException;
 import org.springframework.core.Ordered;
@@ -19,6 +21,11 @@
 @ControllerAdvice
 public class GlobalControllerExceptionHandler extends DefaultHandlerExceptionResolver {
 
+  @PostConstruct
+  public void init() {
+    log.info("GlobalControllerExceptionHandler initialized");
+  }
+
   public GlobalControllerExceptionHandler() {
     setOrder(Ordered.HIGHEST_PRECEDENCE);
     setWarnLogCategory(getClass().getName());
@@ -52,4 +59,9 @@ public static ResponseEntity<Map<String, String>> handleUnauthorizedException(
       UnauthorizedException e) {
     return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN);
   }
+
+  @ExceptionHandler(ActorAccessException.class)
+  public static ResponseEntity<Map<String, String>> actorAccessException(ActorAccessException e) {
+    return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN);
+  }
 }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
index 579a62c084999..592d7bba4211f 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
@@ -637,7 +637,7 @@ public ResponseEntity<E> createAspect(
     AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName).get();
     ChangeMCP upsert =
         toUpsertItem(
-            opContext.getRetrieverContext().get().getAspectRetriever(),
+            opContext.getRetrieverContext().getAspectRetriever(),
             urn,
             aspectSpec,
             createIfEntityNotExists,
@@ -649,7 +649,7 @@ public ResponseEntity<E> createAspect(
         entityService.ingestProposal(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(upsert))
                 .build(),
             async);
@@ -725,7 +725,7 @@ public ResponseEntity<E> patchAspect(
             .build();
     ChangeMCP upsert =
         toUpsertItem(
-            opContext.getRetrieverContext().get().getAspectRetriever(),
+            opContext.getRetrieverContext().getAspectRetriever(),
             validatedUrn(entityUrn),
             aspectSpec,
             currentValue,
@@ -736,7 +736,7 @@ public ResponseEntity<E> patchAspect(
         entityService.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(upsert))
                 .build(),
             true,
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
new file mode 100644
index 0000000000000..99d3879ab9a32
--- /dev/null
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
@@ -0,0 +1,54 @@
+package io.datahubproject.openapi.operations.test;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authentication.AuthenticationContext;
+import com.datahub.authorization.AuthorizerChain;
+import io.datahubproject.metadata.context.OperationContext;
+import io.datahubproject.metadata.context.RequestContext;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import jakarta.servlet.http.HttpServletRequest;
+import java.util.List;
+import java.util.Map;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@RequestMapping("/operations/identity")
+@Slf4j
+@Tag(name = "Identity", description = "An API for checking identity")
+public class IdController {
+  private final AuthorizerChain authorizerChain;
+  private final OperationContext systemOperationContext;
+
+  public IdController(OperationContext systemOperationContext, AuthorizerChain authorizerChain) {
+    this.systemOperationContext = systemOperationContext;
+    this.authorizerChain = authorizerChain;
+  }
+
+  @Tag(name = "User")
+  @GetMapping(path = "/user/urn", produces = MediaType.APPLICATION_JSON_VALUE)
+  @Operation(summary = "User id")
+  public ResponseEntity<Map<String, String>> getUserId(
+      HttpServletRequest request,
+      @RequestParam(value = "skipCache", required = false, defaultValue = "false")
+          Boolean skipCache) {
+    Authentication authentication = AuthenticationContext.getAuthentication();
+    String actorUrnStr = authentication.getActor().toUrnStr();
+
+    OperationContext.asSession(
+        systemOperationContext,
+        RequestContext.builder().buildOpenapi(actorUrnStr, request, "getUserIdentity", List.of()),
+        authorizerChain,
+        authentication,
+        true,
+        skipCache);
+
+    return ResponseEntity.ok(Map.of("urn", actorUrnStr));
+  }
+}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index c38f2db0eefbb..ca425810c87a0 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -491,7 +491,7 @@ public static List<Pair<String, Boolean>> ingestBatchProposal(
     try {
       AspectsBatch batch =
           AspectsBatchImpl.builder()
-              .mcps(serviceProposals, auditStamp, opContext.getRetrieverContext().get())
+              .mcps(serviceProposals, auditStamp, opContext.getRetrieverContext())
               .build();
 
       Map<Urn, List<IngestResult>> resultMap =
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
index 56a7955b9fe87..b1c5709ef0147 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
@@ -203,7 +203,7 @@ protected AspectsBatch toMCPBatch(
                       objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata"))));
             }
 
-            items.add(builder.build(opContext.getAspectRetrieverOpt().get()));
+            items.add(builder.build(opContext.getAspectRetriever()));
           }
         }
       }
@@ -211,7 +211,7 @@ protected AspectsBatch toMCPBatch(
 
     return AspectsBatchImpl.builder()
         .items(items)
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index ce7fd73f99b9e..af13cd3aab051 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -554,14 +554,14 @@ protected AspectsBatch toMCPBatch(
                             GenericRecordUtils.JSON,
                             aspectSpec));
 
-            items.add(builder.build(opContext.getRetrieverContext().get().getAspectRetriever()));
+            items.add(builder.build(opContext.getRetrieverContext().getAspectRetriever()));
           }
         }
       }
     }
     return AspectsBatchImpl.builder()
         .items(items)
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
index 33cfba0f27802..27731af9ffaa7 100644
--- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
+++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
@@ -19,6 +19,10 @@
         "name" : "aspects",
         "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
         "optional" : true
+      }, {
+        "name" : "alwaysIncludeKeyAspect",
+        "type" : "boolean",
+        "optional" : true
       } ]
     }, {
       "method" : "batch_get",
@@ -27,6 +31,10 @@
         "name" : "aspects",
         "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
         "optional" : true
+      }, {
+        "name" : "alwaysIncludeKeyAspect",
+        "type" : "boolean",
+        "optional" : true
       } ]
     } ],
     "entity" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
index 9bf7f97b34be1..9c5f41281fcfb 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
@@ -182,6 +182,10 @@
           "name" : "aspects",
           "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
           "optional" : true
+        }, {
+          "name" : "alwaysIncludeKeyAspect",
+          "type" : "boolean",
+          "optional" : true
         } ]
       }, {
         "method" : "batch_get",
@@ -190,6 +194,10 @@
           "name" : "aspects",
           "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
           "optional" : true
+        }, {
+          "name" : "alwaysIncludeKeyAspect",
+          "type" : "boolean",
+          "optional" : true
         } ]
       } ],
       "entity" : {
diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
index cf6e571cb8cbe..b85f22e781d0b 100644
--- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -45,12 +45,34 @@
 // Consider renaming this to datahub client.
 public interface EntityClient {
 
+  /**
+   * This version follows the legacy behavior of returning key aspects regardless of whether they
+   * exist
+   *
+   * @param opContext operation context
+   * @param entityName entity type
+   * @param urn urn id for the entity
+   * @param aspectNames set of aspects
+   * @return requested entity/aspects
+   */
+  @Deprecated
   @Nullable
-  EntityResponse getV2(
+  default EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
       @Nullable final Set<String> aspectNames)
+      throws RemoteInvocationException, URISyntaxException {
+    return getV2(opContext, entityName, urn, aspectNames, true);
+  }
+
+  @Nullable
+  EntityResponse getV2(
+      @Nonnull OperationContext opContext,
+      @Nonnull String entityName,
+      @Nonnull final Urn urn,
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException;
 
   @Nonnull
@@ -58,12 +80,34 @@ EntityResponse getV2(
   Entity get(@Nonnull OperationContext opContext, @Nonnull final Urn urn)
       throws RemoteInvocationException;
 
+  /**
+   * This version follows the legacy behavior of returning key aspects regardless of whether they
+   * exist
+   *
+   * @param opContext operation context
+   * @param entityName entity type
+   * @param urns urn ids for the entities
+   * @param aspectNames set of aspects
+   * @return requested entity/aspects
+   */
+  @Deprecated
   @Nonnull
-  Map<Urn, EntityResponse> batchGetV2(
+  default Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Set<Urn> urns,
       @Nullable final Set<String> aspectNames)
+      throws RemoteInvocationException, URISyntaxException {
+    return batchGetV2(opContext, entityName, urns, aspectNames, true);
+  }
+
+  @Nonnull
+  Map<Urn, EntityResponse> batchGetV2(
+      @Nonnull OperationContext opContext,
+      @Nonnull String entityName,
+      @Nonnull final Set<Urn> urns,
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException;
 
   @Nonnull
@@ -589,27 +633,38 @@ void rollbackIngestion(
 
   @Nullable
   default Aspect getLatestAspectObject(
-      @Nonnull OperationContext opContext, @Nonnull Urn urn, @Nonnull String aspectName)
+      @Nonnull OperationContext opContext,
+      @Nonnull Urn urn,
+      @Nonnull String aspectName,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
-    return getLatestAspects(opContext, Set.of(urn), Set.of(aspectName))
+    return getLatestAspects(opContext, Set.of(urn), Set.of(aspectName), alwaysIncludeKeyAspect)
         .getOrDefault(urn, Map.of())
         .get(aspectName);
   }
 
   @Nonnull
   default Map<Urn, Map<String, Aspect>> getLatestAspects(
-      @Nonnull OperationContext opContext, @Nonnull Set<Urn> urns, @Nonnull Set<String> aspectNames)
+      @Nonnull OperationContext opContext,
+      @Nonnull Set<Urn> urns,
+      @Nonnull Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
-    return entityResponseToAspectMap(batchGetV2(opContext, entityName, urns, aspectNames));
+    return entityResponseToAspectMap(
+        batchGetV2(opContext, entityName, urns, aspectNames, alwaysIncludeKeyAspect));
   }
 
   @Nonnull
   default Map<Urn, Map<String, SystemAspect>> getLatestSystemAspect(
-      @Nonnull OperationContext opContext, @Nonnull Set<Urn> urns, @Nonnull Set<String> aspectNames)
+      @Nonnull OperationContext opContext,
+      @Nonnull Set<Urn> urns,
+      @Nonnull Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
     return entityResponseToSystemAspectMap(
-        batchGetV2(opContext, entityName, urns, aspectNames), opContext.getEntityRegistry());
+        batchGetV2(opContext, entityName, urns, aspectNames, alwaysIncludeKeyAspect),
+        opContext.getEntityRegistry());
   }
 }
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 516902601f08a..8d4c5e9228a71 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -156,10 +156,15 @@ public EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final EntitiesV2GetRequestBuilder requestBuilder =
-        ENTITIES_V2_REQUEST_BUILDERS.get().aspectsParam(aspectNames).id(urn.toString());
+        ENTITIES_V2_REQUEST_BUILDERS
+            .get()
+            .aspectsParam(aspectNames)
+            .id(urn.toString())
+            .alwaysIncludeKeyAspectParam(alwaysIncludeKeyAspect);
     return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity();
   }
 
@@ -241,7 +246,8 @@ public Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Set<Urn> urns,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
 
     Map<Urn, EntityResponse> responseMap = new HashMap<>();
@@ -260,6 +266,7 @@ public Map<Urn, EntityResponse> batchGetV2(
                                 ENTITIES_V2_REQUEST_BUILDERS
                                     .batchGet()
                                     .aspectsParam(aspectNames)
+                                    .alwaysIncludeKeyAspectParam(alwaysIncludeKeyAspect)
                                     .ids(
                                         batch.stream()
                                             .map(Urn::toString)
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
index 2637e2d067c6d..aa17f1951bc91 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
@@ -59,6 +59,6 @@ public Map<Urn, EntityResponse> batchGetV2NoCache(
       @Nonnull Set<Urn> urns,
       @Nullable Set<String> aspectNames)
       throws RemoteInvocationException, URISyntaxException {
-    return super.batchGetV2(opContext, entityName, urns, aspectNames);
+    return super.batchGetV2(opContext, entityName, urns, aspectNames, false);
   }
 }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index 6033ead36f10e..30b187da00e91 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -309,7 +309,7 @@ private Task<String> ingestProposals(
       log.debug("Proposals: {}", metadataChangeProposals);
       try {
         final AspectsBatch batch = AspectsBatchImpl.builder()
-                .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext().get(),
+                .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext(),
                     opContext.getValidationContext().isAlternateValidation())
                 .build();
 
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
index 20209ddf44d64..896d81d3cbecc 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
@@ -64,7 +64,8 @@ public class EntityV2Resource extends CollectionResourceTaskTemplate<String, Ent
   @Nonnull
   @WithSpan
   public Task<EntityResponse> get(
-      @Nonnull String urnStr, @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames)
+      @Nonnull String urnStr, @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames,
+      @QueryParam(PARAM_ALWAYS_INCLUDE_KEY_ASPECT) @Optional @Nullable Boolean alwaysIncludeKeyAspect)
       throws URISyntaxException {
     log.debug("GET V2 {}", urnStr);
     final Urn urn = Urn.createFromString(urnStr);
@@ -90,7 +91,7 @@ public Task<EntityResponse> get(
                   ? opContext.getEntityAspectNames(entityName)
                   : new HashSet<>(Arrays.asList(aspectNames));
           try {
-            return _entityService.getEntityV2(opContext, entityName, urn, projectedAspects);
+            return _entityService.getEntityV2(opContext, entityName, urn, projectedAspects, alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
           } catch (Exception e) {
             throw new RuntimeException(
                 String.format(
@@ -106,7 +107,8 @@ public Task<EntityResponse> get(
   @WithSpan
   public Task<Map<Urn, EntityResponse>> batchGet(
       @Nonnull Set<String> urnStrs,
-      @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames)
+      @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames,
+      @QueryParam(PARAM_ALWAYS_INCLUDE_KEY_ASPECT) @Optional @Nullable Boolean alwaysIncludeKeyAspect)
       throws URISyntaxException {
     log.debug("BATCH GET V2 {}", urnStrs.toString());
     final Set<Urn> urns = new HashSet<>();
@@ -138,7 +140,7 @@ public Task<Map<Urn, EntityResponse>> batchGet(
                   ? opContext.getEntityAspectNames(entityName)
                   : new HashSet<>(Arrays.asList(aspectNames));
           try {
-            return _entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects);
+            return _entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects, alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
           } catch (Exception e) {
             throw new RuntimeException(
                 String.format(
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
index ef79a404c2145..11df52ad66709 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
@@ -1,5 +1,7 @@
 package com.linkedin.metadata.resources.restli;
 
+import javax.annotation.Nullable;
+
 public final class RestliConstants {
   private RestliConstants() {}
 
@@ -21,6 +23,7 @@ private RestliConstants() {}
   public static final String PARAM_INPUT = "input";
   public static final String PARAM_MAX_HOPS = "maxHops";
   public static final String PARAM_ASPECTS = "aspects";
+  public static final String PARAM_ALWAYS_INCLUDE_KEY_ASPECT = "alwaysIncludeKeyAspect";
   public static final String PARAM_FILTER = "filter";
   public static final String PARAM_GROUP = "group";
   public static final String PARAM_SORT = "sort";
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
index 185874fac1382..a2092405da3ff 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
@@ -8,6 +8,7 @@
 import com.linkedin.parseq.Task;
 import com.linkedin.restli.common.HttpStatus;
 import com.linkedin.restli.server.RestLiServiceException;
+import io.datahubproject.metadata.exception.ActorAccessException;
 import java.util.Optional;
 import java.util.function.Supplier;
 import javax.annotation.Nonnull;
@@ -38,6 +39,8 @@ public static <T> Task<T> toTask(@Nonnull Supplier<T> supplier) {
       if (throwable instanceof IllegalArgumentException
           || throwable.getCause() instanceof IllegalArgumentException) {
         finalException = badRequestException(throwable.getMessage());
+      } else if (throwable.getCause() instanceof ActorAccessException) {
+          finalException = forbidden(throwable.getCause().getMessage());
       } else if (throwable instanceof APIThrottleException) {
         finalException = apiThrottled(throwable.getMessage());
       } else if (throwable instanceof RestLiServiceException) {
@@ -109,4 +112,9 @@ public static RestLiServiceException invalidArgumentsException(@Nullable String
   public static RestLiServiceException apiThrottled(@Nullable String message) {
     return new RestLiServiceException(HttpStatus.S_429_TOO_MANY_REQUESTS, message);
   }
+
+  @Nonnull
+  public static RestLiServiceException forbidden(@Nullable String message) {
+    return new RestLiServiceException(HttpStatus.S_403_FORBIDDEN, message);
+  }
 }
diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
index a39401c170a11..037b5b81fd4df 100644
--- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
+++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
@@ -100,7 +100,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException {
             .recordTemplate(mcp.getAspect())
             .auditStamp(new AuditStamp())
             .metadataChangeProposal(mcp)
-            .build(opContext.getAspectRetrieverOpt().get());
+            .build(opContext.getAspectRetriever());
     when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt()))
         .thenReturn(
             List.of(List.of(
diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py
index af29437c051e1..006daae39333e 100644
--- a/smoke-test/tests/tokens/revokable_access_token_test.py
+++ b/smoke-test/tests/tokens/revokable_access_token_test.py
@@ -9,6 +9,8 @@
     wait_for_writes_to_sync,
 )
 
+from .token_utils import listUsers, removeUser
+
 pytestmark = pytest.mark.no_cypress_suite1
 
 # Disable telemetry
@@ -490,45 +492,3 @@ def getAccessTokenMetadata(session, token):
     response.raise_for_status()
 
     return response.json()
-
-
-def removeUser(session, urn):
-    # Remove user
-    json = {
-        "query": """mutation removeUser($urn: String!) {
-            removeUser(urn: $urn)
-        }""",
-        "variables": {"urn": urn},
-    }
-
-    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
-
-    response.raise_for_status()
-    return response.json()
-
-
-def listUsers(session):
-    input = {
-        "start": "0",
-        "count": "20",
-    }
-
-    # list users
-    json = {
-        "query": """query listUsers($input: ListUsersInput!) {
-            listUsers(input: $input) {
-              start
-              count
-              total
-              users {
-                username
-              }
-            }
-        }""",
-        "variables": {"input": input},
-    }
-
-    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
-
-    response.raise_for_status()
-    return response.json()
diff --git a/smoke-test/tests/tokens/session_access_token_test.py b/smoke-test/tests/tokens/session_access_token_test.py
new file mode 100644
index 0000000000000..a16abc4445303
--- /dev/null
+++ b/smoke-test/tests/tokens/session_access_token_test.py
@@ -0,0 +1,173 @@
+import os
+import time
+
+import pytest
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import AuditStampClass, CorpUserStatusClass
+from requests.exceptions import HTTPError
+
+from tests.utils import (
+    get_admin_credentials,
+    get_frontend_url,
+    login_as,
+    wait_for_writes_to_sync,
+)
+
+from .token_utils import getUserId, listUsers, removeUser
+
+pytestmark = pytest.mark.no_cypress_suite1
+
+# Disable telemetry
+os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
+
+(admin_user, admin_pass) = get_admin_credentials()
+user_urn = "urn:li:corpuser:sessionUser"
+
+
+@pytest.fixture(scope="class")
+def custom_user_session():
+    """Fixture to execute setup before and tear down after all tests are run"""
+    admin_session = login_as(admin_user, admin_pass)
+
+    res_data = removeUser(admin_session, user_urn)
+    assert res_data
+    assert "error" not in res_data
+
+    # Test getting the invite token
+    get_invite_token_json = {
+        "query": """query getInviteToken($input: GetInviteTokenInput!) {
+            getInviteToken(input: $input){
+              inviteToken
+            }
+        }""",
+        "variables": {"input": {}},
+    }
+
+    get_invite_token_response = admin_session.post(
+        f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json
+    )
+    get_invite_token_response.raise_for_status()
+    get_invite_token_res_data = get_invite_token_response.json()
+
+    assert get_invite_token_res_data
+    assert get_invite_token_res_data["data"]
+    invite_token = get_invite_token_res_data["data"]["getInviteToken"]["inviteToken"]
+    assert invite_token is not None
+    assert "error" not in invite_token
+
+    # Pass the invite token when creating the user
+    sign_up_json = {
+        "fullName": "Test Session User",
+        "email": "sessionUser",
+        "password": "sessionUser",
+        "title": "Date Engineer",
+        "inviteToken": invite_token,
+    }
+
+    sign_up_response = admin_session.post(
+        f"{get_frontend_url()}/signUp", json=sign_up_json
+    )
+    sign_up_response.raise_for_status()
+    assert sign_up_response
+    assert "error" not in sign_up_response
+    # Sleep for eventual consistency
+    wait_for_writes_to_sync()
+
+    # signUp will override the session cookie to the new user to be signed up.
+    admin_session.cookies.clear()
+    admin_session = login_as(admin_user, admin_pass)
+
+    # Make user created user is there.
+    res_data = listUsers(admin_session)
+    assert res_data["data"]
+    assert res_data["data"]["listUsers"]
+    assert {"username": "sessionUser"} in res_data["data"]["listUsers"]["users"]
+
+    yield login_as(sign_up_json["email"], sign_up_json["password"])
+
+    # Delete created user
+    res_data = removeUser(admin_session, user_urn)
+    assert res_data
+    assert res_data["data"]
+    assert res_data["data"]["removeUser"] is True
+    # Sleep for eventual consistency
+    wait_for_writes_to_sync()
+
+    # Make user created user is not there.
+    res_data = listUsers(admin_session)
+    assert res_data["data"]
+    assert res_data["data"]["listUsers"]
+    assert {"username": "sessionUser"} not in res_data["data"]["listUsers"]["users"]
+
+
+@pytest.mark.dependency()
+def test_soft_delete(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.soft_delete_entity(urn=user_urn)
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
+
+    # undo soft delete
+    graph_client.set_soft_delete_status(urn=user_urn, delete=False)
+    wait_for_writes_to_sync()
+
+
+@pytest.mark.dependency(depends=["test_soft_delete"])
+def test_suspend(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.emit(
+        MetadataChangeProposalWrapper(
+            entityType="corpuser",
+            entityUrn=user_urn,
+            changeType="UPSERT",
+            aspectName="corpUserStatus",
+            aspect=CorpUserStatusClass(
+                status="SUSPENDED",
+                lastModified=AuditStampClass(
+                    time=int(time.time() * 1000.0), actor="urn:li:corpuser:unknown"
+                ),
+            ),
+        )
+    )
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
+
+    # undo suspend
+    graph_client.emit(
+        MetadataChangeProposalWrapper(
+            entityType="corpuser",
+            entityUrn=user_urn,
+            changeType="UPSERT",
+            aspectName="corpUserStatus",
+            aspect=CorpUserStatusClass(
+                status="ACTIVE",
+                lastModified=AuditStampClass(
+                    time=int(time.time() * 1000.0), actor="urn:li:corpuser:unknown"
+                ),
+            ),
+        )
+    )
+    wait_for_writes_to_sync()
+
+
+@pytest.mark.dependency(depends=["test_suspend"])
+def test_hard_delete(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.hard_delete_entity(urn=user_urn)
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
diff --git a/smoke-test/tests/tokens/token_utils.py b/smoke-test/tests/tokens/token_utils.py
new file mode 100644
index 0000000000000..10558e7085de7
--- /dev/null
+++ b/smoke-test/tests/tokens/token_utils.py
@@ -0,0 +1,53 @@
+from tests.utils import get_frontend_url
+
+
+def getUserId(session):
+    response = session.get(
+        f"{get_frontend_url()}/openapi/operations/identity/user/urn",
+        params={"skipCache": "true"},
+    )
+
+    response.raise_for_status()
+    return response.json()
+
+
+def removeUser(session, urn):
+    # Remove user
+    json = {
+        "query": """mutation removeUser($urn: String!) {
+            removeUser(urn: $urn)
+        }""",
+        "variables": {"urn": urn},
+    }
+
+    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+
+    response.raise_for_status()
+    return response.json()
+
+
+def listUsers(session):
+    input = {
+        "start": "0",
+        "count": "20",
+    }
+
+    # list users
+    json = {
+        "query": """query listUsers($input: ListUsersInput!) {
+            listUsers(input: $input) {
+              start
+              count
+              total
+              users {
+                username
+              }
+            }
+        }""",
+        "variables": {"input": input},
+    }
+
+    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+
+    response.raise_for_status()
+    return response.json()

From 83904b7f351c9ea8b9ac7737892b2b21caedb720 Mon Sep 17 00:00:00 2001
From: Chris Collins <chriscollins3456@gmail.com>
Date: Wed, 18 Dec 2024 17:02:16 -0500
Subject: [PATCH 3/8] fix(env) Fix forms hook env var default config (#12155)

---
 .../configuration/src/main/resources/application.yaml           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 75b4c8e8b002f..9010d77015f16 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -561,7 +561,7 @@ springdoc.api-docs.groups.enabled: true
 
 forms:
   hook:
-    enabled: { $FORMS_HOOK_ENABLED:true }
+    enabled: ${FORMS_HOOK_ENABLED:true}
     consumerGroupSuffix: ${FORMS_HOOK_CONSUMER_GROUP_SUFFIX:}
 
 businessAttribute:

From da8f8221977444644596da40e676e15362bd7a2d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 18 Dec 2024 14:36:10 -0800
Subject: [PATCH 4/8] feat(ingest/mlflow): Support configurable
 base_external_url (#12167)

---
 .../src/datahub/ingestion/source/mlflow.py    | 35 ++++++++++++++++---
 .../tests/unit/test_mlflow_source.py          | 13 +++++++
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
index cef6d2b1bb577..26d160acf330c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@@ -38,16 +38,30 @@
 class MLflowConfig(EnvConfigMixin):
     tracking_uri: Optional[str] = Field(
         default=None,
-        description="Tracking server URI. If not set, an MLflow default tracking_uri is used (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)",
+        description=(
+            "Tracking server URI. If not set, an MLflow default tracking_uri is used"
+            " (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)"
+        ),
     )
     registry_uri: Optional[str] = Field(
         default=None,
-        description="Registry server URI. If not set, an MLflow default registry_uri is used (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)",
+        description=(
+            "Registry server URI. If not set, an MLflow default registry_uri is used"
+            " (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)"
+        ),
     )
     model_name_separator: str = Field(
         default="_",
         description="A string which separates model name from its version (e.g. model_1 or model-1)",
     )
+    base_external_url: Optional[str] = Field(
+        default=None,
+        description=(
+            "Base URL to use when constructing external URLs to MLflow."
+            " If not set, tracking_uri is used if it's an HTTP URL."
+            " If neither is set, external URLs are not generated."
+        ),
+    )
 
 
 @dataclass
@@ -279,12 +293,23 @@ def _make_ml_model_urn(self, model_version: ModelVersion) -> str:
         )
         return urn
 
-    def _make_external_url(self, model_version: ModelVersion) -> Union[None, str]:
+    def _get_base_external_url_from_tracking_uri(self) -> Optional[str]:
+        if isinstance(
+            self.client.tracking_uri, str
+        ) and self.client.tracking_uri.startswith("http"):
+            return self.client.tracking_uri
+        else:
+            return None
+
+    def _make_external_url(self, model_version: ModelVersion) -> Optional[str]:
         """
         Generate URL for a Model Version to MLflow UI.
         """
-        base_uri = self.client.tracking_uri
-        if base_uri.startswith("http"):
+        base_uri = (
+            self.config.base_external_url
+            or self._get_base_external_url_from_tracking_uri()
+        )
+        if base_uri:
             return f"{base_uri.rstrip('/')}/#/models/{model_version.name}/versions/{model_version.version}"
         else:
             return None
diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py
index d213dd92352e6..e882296b6f331 100644
--- a/metadata-ingestion/tests/unit/test_mlflow_source.py
+++ b/metadata-ingestion/tests/unit/test_mlflow_source.py
@@ -136,3 +136,16 @@ def test_make_external_link_remote(source, model_version):
     url = source._make_external_url(model_version)
 
     assert url == expected_url
+
+
+def test_make_external_link_remote_via_config(source, model_version):
+    custom_base_url = "https://custom-server.org"
+    source.config.base_external_url = custom_base_url
+    source.client = MlflowClient(
+        tracking_uri="https://dummy-mlflow-tracking-server.org"
+    )
+    expected_url = f"{custom_base_url}/#/models/{model_version.name}/versions/{model_version.version}"
+
+    url = source._make_external_url(model_version)
+
+    assert url == expected_url

From 4392d72456faae5f0f59eb09756287182feec56b Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 18 Dec 2024 20:29:34 -0500
Subject: [PATCH 5/8] fix(cli/properties): fix data type validation (#12170)

---
 .../structuredproperties.py                   | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index e37281dea86e1..619f69b016262 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -14,7 +14,7 @@
     PropertyValueClass,
     StructuredPropertyDefinitionClass,
 )
-from datahub.metadata.urns import StructuredPropertyUrn, Urn
+from datahub.metadata.urns import DataTypeUrn, StructuredPropertyUrn, Urn
 from datahub.utilities.urns._urn_base import URN_TYPES
 
 logging.basicConfig(level=logging.INFO)
@@ -86,19 +86,31 @@ class StructuredProperties(ConfigModel):
 
     @validator("type")
     def validate_type(cls, v: str) -> str:
-        # Convert to lowercase if needed
-        if not v.islower():
+        # This logic is somewhat hacky, since we need to deal with
+        # 1. fully qualified urns
+        # 2. raw data types, that need to get the datahub namespace prefix
+        # While keeping the user-facing interface and error messages clean.
+
+        if not v.startswith("urn:li:") and not v.islower():
+            # Convert to lowercase if needed
+            v = v.lower()
             logger.warning(
-                f"Structured property type should be lowercase. Updated to {v.lower()}"
+                f"Structured property type should be lowercase. Updated to {v}"
             )
-            v = v.lower()
+
+        urn = Urn.make_data_type_urn(v)
 
         # Check if type is allowed
-        if not AllowedTypes.check_allowed_type(v):
+        data_type_urn = DataTypeUrn.from_string(urn)
+        unqualified_data_type = data_type_urn.id
+        if unqualified_data_type.startswith("datahub."):
+            unqualified_data_type = unqualified_data_type[len("datahub.") :]
+        if not AllowedTypes.check_allowed_type(unqualified_data_type):
             raise ValueError(
-                f"Type {v} is not allowed. Allowed types are {AllowedTypes.values()}"
+                f"Type {unqualified_data_type} is not allowed. Allowed types are {AllowedTypes.values()}"
             )
-        return v
+
+        return urn
 
     @property
     def fqn(self) -> str:

From 48f3cc578589c5c0379d5117756f01a0228669b4 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 18 Dec 2024 21:53:20 -0600
Subject: [PATCH 6/8] fix(pgsql): Postgres doesn't support UNION select with
 FOR UPDATE (#12169)

---
 .../metadata/entity/ebean/EbeanAspectDao.java | 87 ++++++++++++++++++-
 .../metadata/config/EbeanConfiguration.java   |  1 +
 .../src/main/resources/application.yaml       |  1 +
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index bd6cc67561b88..ea580a97c5188 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -93,8 +93,14 @@ public class EbeanAspectDao implements AspectDao, AspectMigrationsDao {
    */
   private final LoadingCache<String, Lock> locks;
 
+  private final String batchGetMethod;
+
   public EbeanAspectDao(@Nonnull final Database server, EbeanConfiguration ebeanConfiguration) {
     _server = server;
+    this.batchGetMethod =
+        ebeanConfiguration.getBatchGetMethod() != null
+            ? ebeanConfiguration.getBatchGetMethod()
+            : "IN";
     if (ebeanConfiguration.getLocking().isEnabled()) {
       this.locks =
           CacheBuilder.newBuilder()
@@ -371,23 +377,37 @@ private List<EbeanAspectV2> batchGet(
 
     final int totalPageCount = QueryUtils.getTotalPageCount(keys.size(), keysCount);
     final List<EbeanAspectV2> finalResult =
-        batchGetUnion(new ArrayList<>(keys), keysCount, position, forUpdate);
+        batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate);
 
     while (QueryUtils.hasMore(position, keysCount, totalPageCount)) {
       position += keysCount;
       final List<EbeanAspectV2> oneStatementResult =
-          batchGetUnion(new ArrayList<>(keys), keysCount, position, forUpdate);
+          batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate);
       finalResult.addAll(oneStatementResult);
     }
 
     return finalResult;
   }
 
+  @Nonnull
+  private List<EbeanAspectV2> batchGetSelectString(
+      @Nonnull final List<EbeanAspectV2.PrimaryKey> keys,
+      final int keysCount,
+      final int position,
+      boolean forUpdate) {
+
+    if (batchGetMethod.equals("IN")) {
+      return batchGetIn(keys, keysCount, position, forUpdate);
+    }
+
+    return batchGetUnion(keys, keysCount, position, forUpdate);
+  }
+
   /**
    * Builds a single SELECT statement for batch get, which selects one entity, and then can be
    * UNION'd with other SELECT statements.
    */
-  private String batchGetSelect(
+  private String batchGetSelectString(
       final int selectId,
       @Nonnull final String urn,
       @Nonnull final String aspect,
@@ -434,7 +454,7 @@ private List<EbeanAspectV2> batchGetUnion(
     final Map<String, Object> params = new HashMap<>();
     for (int index = position; index < end; index++) {
       sb.append(
-          batchGetSelect(
+          batchGetSelectString(
               index - position,
               keys.get(index).getUrn(),
               keys.get(index).getAspect(),
@@ -467,6 +487,65 @@ private List<EbeanAspectV2> batchGetUnion(
     return query.findList();
   }
 
+  @Nonnull
+  private List<EbeanAspectV2> batchGetIn(
+      @Nonnull final List<EbeanAspectV2.PrimaryKey> keys,
+      final int keysCount,
+      final int position,
+      boolean forUpdate) {
+    validateConnection();
+
+    // Build a single SELECT with IN clause using composite key comparison
+    // Query will look like:
+    // SELECT * FROM metadata_aspect WHERE (urn, aspect, version) IN
+    // (('urn0', 'aspect0', 0), ('urn1', 'aspect1', 1))
+    final StringBuilder sb = new StringBuilder();
+    sb.append(
+        "SELECT urn, aspect, version, metadata, systemMetadata, createdOn, createdBy, createdFor ");
+    sb.append("FROM metadata_aspect_v2 WHERE (urn, aspect, version) IN (");
+
+    final int end = Math.min(keys.size(), position + keysCount);
+    final Map<String, Object> params = new HashMap<>();
+
+    for (int index = position; index < end; index++) {
+      int paramIndex = index - position;
+      String urnParam = "urn" + paramIndex;
+      String aspectParam = "aspect" + paramIndex;
+      String versionParam = "version" + paramIndex;
+
+      params.put(urnParam, keys.get(index).getUrn());
+      params.put(aspectParam, keys.get(index).getAspect());
+      params.put(versionParam, keys.get(index).getVersion());
+
+      sb.append("(:" + urnParam + ", :" + aspectParam + ", :" + versionParam + ")");
+
+      if (index != end - 1) {
+        sb.append(",");
+      }
+    }
+
+    sb.append(")");
+
+    if (forUpdate) {
+      sb.append(" FOR UPDATE");
+    }
+
+    final RawSql rawSql =
+        RawSqlBuilder.parse(sb.toString())
+            .columnMapping(EbeanAspectV2.URN_COLUMN, "key.urn")
+            .columnMapping(EbeanAspectV2.ASPECT_COLUMN, "key.aspect")
+            .columnMapping(EbeanAspectV2.VERSION_COLUMN, "key.version")
+            .create();
+
+    final Query<EbeanAspectV2> query = _server.find(EbeanAspectV2.class).setRawSql(rawSql);
+
+    for (Map.Entry<String, Object> param : params.entrySet()) {
+      query.setParameter(param.getKey(), param.getValue());
+    }
+
+    return query.findList();
+  }
+
   @Override
   @Nonnull
   public ListResult<String> listUrns(
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
index 47b406e695a3f..6eb31e14a2d3b 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
@@ -23,6 +23,7 @@ public class EbeanConfiguration {
   private boolean autoCreateDdl;
   private boolean postgresUseIamAuth;
   private LockingConfiguration locking;
+  private String batchGetMethod;
 
   public static final EbeanConfiguration testDefault =
       EbeanConfiguration.builder().locking(LockingConfiguration.testDefault).build();
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 9010d77015f16..b997bc108e4ba 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -164,6 +164,7 @@ ebean:
   waitTimeoutMillis: ${EBEAN_WAIT_TIMEOUT_MILLIS:1000}
   autoCreateDdl: ${EBEAN_AUTOCREATE:false}
   postgresUseIamAuth: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:false}
+  batchGetMethod: ${EBEAN_BATCH_GET_METHOD:IN} # Alternative UNION
   locking:
     enabled: ${EBEAN_LOCKING_ENABLED:false}
     durationSeconds: ${EBEAN_LOCKING_DURATION_SECONDS:60}

From 953893cf2e72e71580b21bdfc12592fca572e13b Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 19 Dec 2024 12:39:47 +0530
Subject: [PATCH 7/8] refactor(ingest/kafka-connect): define interface for new
 connector impl (#12149)

---
 metadata-ingestion/setup.py                   |    2 +-
 .../ingestion/source/kafka/kafka_connect.py   | 1468 -----------------
 .../source/kafka_connect/__init__.py          |    0
 .../ingestion/source/kafka_connect/common.py  |  202 +++
 .../source/kafka_connect/kafka_connect.py     |  367 +++++
 .../source/kafka_connect/sink_connectors.py   |  341 ++++
 .../source/kafka_connect/source_connectors.py |  570 +++++++
 7 files changed, 1481 insertions(+), 1469 deletions(-)
 delete mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka_connect/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 6334b3abbb8a0..c6994dd6d5aa6 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -741,7 +741,7 @@
         "hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource",
         "json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource",
         "kafka = datahub.ingestion.source.kafka.kafka:KafkaSource",
-        "kafka-connect = datahub.ingestion.source.kafka.kafka_connect:KafkaConnectSource",
+        "kafka-connect = datahub.ingestion.source.kafka_connect.kafka_connect:KafkaConnectSource",
         "ldap = datahub.ingestion.source.ldap:LDAPSource",
         "looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource",
         "lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py
deleted file mode 100644
index 23a99ccb310e1..0000000000000
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py
+++ /dev/null
@@ -1,1468 +0,0 @@
-import logging
-import re
-from dataclasses import dataclass, field
-from typing import Dict, Iterable, List, Optional, Tuple
-
-import jpype
-import jpype.imports
-import requests
-from pydantic.fields import Field
-from sqlalchemy.engine.url import make_url
-
-import datahub.emitter.mce_builder as builder
-import datahub.metadata.schema_classes as models
-from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import (
-    DatasetLineageProviderConfigBase,
-    PlatformInstanceConfigMixin,
-)
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.common import PipelineContext
-from datahub.ingestion.api.decorators import (
-    SourceCapability,
-    SupportStatus,
-    capability,
-    config_class,
-    platform_name,
-    support_status,
-)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
-from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
-    get_platform_from_sqlalchemy_uri,
-)
-from datahub.ingestion.source.state.stale_entity_removal_handler import (
-    StaleEntityRemovalHandler,
-    StaleEntityRemovalSourceReport,
-    StatefulStaleMetadataRemovalConfig,
-)
-from datahub.ingestion.source.state.stateful_ingestion_base import (
-    StatefulIngestionConfigBase,
-    StatefulIngestionSourceBase,
-)
-
-logger = logging.getLogger(__name__)
-
-KAFKA = "kafka"
-SOURCE = "source"
-SINK = "sink"
-CONNECTOR_CLASS = "connector.class"
-
-
-class ProvidedConfig(ConfigModel):
-    provider: str
-    path_key: str
-    value: str
-
-
-class GenericConnectorConfig(ConfigModel):
-    connector_name: str
-    source_dataset: str
-    source_platform: str
-
-
-class KafkaConnectSourceConfig(
-    PlatformInstanceConfigMixin,
-    DatasetLineageProviderConfigBase,
-    StatefulIngestionConfigBase,
-):
-    # See the Connect REST Interface for details
-    # https://docs.confluent.io/platform/current/connect/references/restapi.html#
-    connect_uri: str = Field(
-        default="http://localhost:8083/", description="URI to connect to."
-    )
-    username: Optional[str] = Field(default=None, description="Kafka Connect username.")
-    password: Optional[str] = Field(default=None, description="Kafka Connect password.")
-    cluster_name: Optional[str] = Field(
-        default="connect-cluster", description="Cluster to ingest from."
-    )
-    # convert lineage dataset's urns to lowercase
-    convert_lineage_urns_to_lowercase: bool = Field(
-        default=False,
-        description="Whether to convert the urns of ingested lineage dataset to lowercase",
-    )
-    connector_patterns: AllowDenyPattern = Field(
-        default=AllowDenyPattern.allow_all(),
-        description="regex patterns for connectors to filter for ingestion.",
-    )
-    provided_configs: Optional[List[ProvidedConfig]] = Field(
-        default=None, description="Provided Configurations"
-    )
-    connect_to_platform_map: Optional[Dict[str, Dict[str, str]]] = Field(
-        default=None,
-        description='Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { "postgres-connector-finance-db": "postgres": "core_finance_instance" }`',
-    )
-    platform_instance_map: Optional[Dict[str, str]] = Field(
-        default=None,
-        description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
-    )
-    generic_connectors: List[GenericConnectorConfig] = Field(
-        default=[],
-        description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector",
-    )
-
-    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
-
-
-@dataclass
-class KafkaConnectSourceReport(StaleEntityRemovalSourceReport):
-    connectors_scanned: int = 0
-    filtered: List[str] = field(default_factory=list)
-
-    def report_connector_scanned(self, connector: str) -> None:
-        self.connectors_scanned += 1
-
-    def report_dropped(self, connector: str) -> None:
-        self.filtered.append(connector)
-
-
-@dataclass
-class KafkaConnectLineage:
-    """Class to store Kafka Connect lineage mapping, Each instance is potential DataJob"""
-
-    source_platform: str
-    target_dataset: str
-    target_platform: str
-    job_property_bag: Optional[Dict[str, str]] = None
-    source_dataset: Optional[str] = None
-
-
-@dataclass
-class ConnectorManifest:
-    """Each instance is potential DataFlow"""
-
-    name: str
-    type: str
-    config: Dict
-    tasks: Dict
-    url: Optional[str] = None
-    flow_property_bag: Optional[Dict[str, str]] = None
-    lineages: List[KafkaConnectLineage] = field(default_factory=list)
-    topic_names: Iterable[str] = field(default_factory=list)
-
-
-def remove_prefix(text: str, prefix: str) -> str:
-    if text.startswith(prefix):
-        index = len(prefix)
-        return text[index:]
-    return text
-
-
-def unquote(
-    string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
-) -> str:
-    """
-    If string starts and ends with a quote, unquote it
-    """
-    trailing_quote = trailing_quote if trailing_quote else leading_quote
-    if string.startswith(leading_quote) and string.endswith(trailing_quote):
-        string = string[1:-1]
-    return string
-
-
-def get_dataset_name(
-    database_name: Optional[str],
-    source_table: str,
-) -> str:
-    if database_name:
-        dataset_name = database_name + "." + source_table
-    else:
-        dataset_name = source_table
-
-    return dataset_name
-
-
-def get_platform_instance(
-    config: KafkaConnectSourceConfig, connector_name: str, platform: str
-) -> Optional[str]:
-    instance_name = None
-    if (
-        config.connect_to_platform_map
-        and config.connect_to_platform_map.get(connector_name)
-        and config.connect_to_platform_map[connector_name].get(platform)
-    ):
-        instance_name = config.connect_to_platform_map[connector_name][platform]
-        if config.platform_instance_map and config.platform_instance_map.get(platform):
-            logger.warning(
-                f"Same source platform {platform} configured in both platform_instance_map and connect_to_platform_map."
-                "Will prefer connector specific platform instance from connect_to_platform_map."
-            )
-    elif config.platform_instance_map and config.platform_instance_map.get(platform):
-        instance_name = config.platform_instance_map[platform]
-    logger.info(
-        f"Instance name assigned is: {instance_name} for Connector Name {connector_name} and platform {platform}"
-    )
-    return instance_name
-
-
-@dataclass
-class ConfluentJDBCSourceConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self,
-        connector_manifest: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-        report: KafkaConnectSourceReport,
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self.report = report
-        self._extract_lineages()
-
-    REGEXROUTER = "org.apache.kafka.connect.transforms.RegexRouter"
-    KNOWN_TOPICROUTING_TRANSFORMS = [REGEXROUTER]
-    # https://kafka.apache.org/documentation/#connect_included_transformation
-    KAFKA_NONTOPICROUTING_TRANSFORMS = [
-        "InsertField",
-        "InsertField$Key",
-        "InsertField$Value",
-        "ReplaceField",
-        "ReplaceField$Key",
-        "ReplaceField$Value",
-        "MaskField",
-        "MaskField$Key",
-        "MaskField$Value",
-        "ValueToKey",
-        "ValueToKey$Key",
-        "ValueToKey$Value",
-        "HoistField",
-        "HoistField$Key",
-        "HoistField$Value",
-        "ExtractField",
-        "ExtractField$Key",
-        "ExtractField$Value",
-        "SetSchemaMetadata",
-        "SetSchemaMetadata$Key",
-        "SetSchemaMetadata$Value",
-        "Flatten",
-        "Flatten$Key",
-        "Flatten$Value",
-        "Cast",
-        "Cast$Key",
-        "Cast$Value",
-        "HeadersFrom",
-        "HeadersFrom$Key",
-        "HeadersFrom$Value",
-        "TimestampConverter",
-        "Filter",
-        "InsertHeader",
-        "DropHeaders",
-    ]
-    # https://docs.confluent.io/platform/current/connect/transforms/overview.html
-    CONFLUENT_NONTOPICROUTING_TRANSFORMS = [
-        "Drop",
-        "Drop$Key",
-        "Drop$Value",
-        "Filter",
-        "Filter$Key",
-        "Filter$Value",
-        "TombstoneHandler",
-    ]
-    KNOWN_NONTOPICROUTING_TRANSFORMS = (
-        KAFKA_NONTOPICROUTING_TRANSFORMS
-        + [
-            f"org.apache.kafka.connect.transforms.{t}"
-            for t in KAFKA_NONTOPICROUTING_TRANSFORMS
-        ]
-        + CONFLUENT_NONTOPICROUTING_TRANSFORMS
-        + [
-            f"io.confluent.connect.transforms.{t}"
-            for t in CONFLUENT_NONTOPICROUTING_TRANSFORMS
-        ]
-    )
-
-    @dataclass
-    class JdbcParser:
-        db_connection_url: str
-        source_platform: str
-        database_name: str
-        topic_prefix: str
-        query: str
-        transforms: list
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> JdbcParser:
-        url = remove_prefix(
-            str(connector_manifest.config.get("connection.url")), "jdbc:"
-        )
-        url_instance = make_url(url)
-        source_platform = get_platform_from_sqlalchemy_uri(str(url_instance))
-        database_name = url_instance.database
-        assert database_name
-        db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
-
-        topic_prefix = self.connector_manifest.config.get("topic.prefix", None)
-
-        query = self.connector_manifest.config.get("query", None)
-
-        transform_names = (
-            self.connector_manifest.config.get("transforms", "").split(",")
-            if self.connector_manifest.config.get("transforms")
-            else []
-        )
-
-        transforms = []
-        for name in transform_names:
-            transform = {"name": name}
-            transforms.append(transform)
-            for key in self.connector_manifest.config.keys():
-                if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
-
-        return self.JdbcParser(
-            db_connection_url,
-            source_platform,
-            database_name,
-            topic_prefix,
-            query,
-            transforms,
-        )
-
-    def default_get_lineages(
-        self,
-        topic_prefix: str,
-        database_name: str,
-        source_platform: str,
-        topic_names: Optional[Iterable[str]] = None,
-        include_source_dataset: bool = True,
-    ) -> List[KafkaConnectLineage]:
-        lineages: List[KafkaConnectLineage] = []
-        if not topic_names:
-            topic_names = self.connector_manifest.topic_names
-        table_name_tuples: List[Tuple] = self.get_table_names()
-        for topic in topic_names:
-            # All good for NO_TRANSFORM or (SINGLE_TRANSFORM and KNOWN_NONTOPICROUTING_TRANSFORM) or (not SINGLE_TRANSFORM and all(KNOWN_NONTOPICROUTING_TRANSFORM))
-            source_table: str = (
-                remove_prefix(topic, topic_prefix) if topic_prefix else topic
-            )
-            # include schema name for three-level hierarchies
-            if has_three_level_hierarchy(source_platform):
-                table_name_tuple: Tuple = next(
-                    iter([t for t in table_name_tuples if t and t[-1] == source_table]),
-                    (),
-                )
-                if len(table_name_tuple) > 1:
-                    source_table = f"{table_name_tuple[-2]}.{source_table}"
-                else:
-                    include_source_dataset = False
-                    self.report.warning(
-                        "Could not find schema for table"
-                        f"{self.connector_manifest.name} : {source_table}",
-                    )
-            dataset_name: str = get_dataset_name(database_name, source_table)
-            lineage = KafkaConnectLineage(
-                source_dataset=dataset_name if include_source_dataset else None,
-                source_platform=source_platform,
-                target_dataset=topic,
-                target_platform=KAFKA,
-            )
-            lineages.append(lineage)
-        return lineages
-
-    def get_table_names(self) -> List[Tuple]:
-        sep: str = "."
-        leading_quote_char: str = '"'
-        trailing_quote_char: str = leading_quote_char
-
-        table_ids: List[str] = []
-        if self.connector_manifest.tasks:
-            table_ids = (
-                ",".join(
-                    [
-                        task["config"].get("tables")
-                        for task in self.connector_manifest.tasks
-                    ]
-                )
-            ).split(",")
-            quote_method = self.connector_manifest.config.get(
-                "quote.sql.identifiers", "always"
-            )
-            if (
-                quote_method == "always"
-                and table_ids
-                and table_ids[0]
-                and table_ids[-1]
-            ):
-                leading_quote_char = table_ids[0][0]
-                trailing_quote_char = table_ids[-1][-1]
-                # This will only work for single character quotes
-        elif self.connector_manifest.config.get("table.whitelist"):
-            table_ids = self.connector_manifest.config.get("table.whitelist").split(",")  # type: ignore
-
-        # List of Tuple containing (schema, table)
-        tables: List[Tuple] = [
-            (
-                (
-                    unquote(
-                        table_id.split(sep)[-2], leading_quote_char, trailing_quote_char
-                    )
-                    if len(table_id.split(sep)) > 1
-                    else ""
-                ),
-                unquote(
-                    table_id.split(sep)[-1], leading_quote_char, trailing_quote_char
-                ),
-            )
-            for table_id in table_ids
-        ]
-        return tables
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        source_platform = parser.source_platform
-        database_name = parser.database_name
-        query = parser.query
-        topic_prefix = parser.topic_prefix
-        transforms = parser.transforms
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # Mask/Remove properties that may reveal credentials
-        self.connector_manifest.flow_property_bag[
-            "connection.url"
-        ] = parser.db_connection_url
-        if "connection.password" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["connection.password"]
-        if "connection.user" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["connection.user"]
-
-        logging.debug(
-            f"Extracting source platform: {source_platform} and database name: {database_name} from connection url "
-        )
-
-        if not self.connector_manifest.topic_names:
-            self.connector_manifest.lineages = lineages
-            return
-
-        if query:
-            # Lineage source_table can be extracted by parsing query
-            for topic in self.connector_manifest.topic_names:
-                # default method - as per earlier implementation
-                dataset_name: str = get_dataset_name(database_name, topic)
-
-                lineage = KafkaConnectLineage(
-                    source_dataset=None,
-                    source_platform=source_platform,
-                    target_dataset=topic,
-                    target_platform=KAFKA,
-                )
-                lineages.append(lineage)
-                self.report.warning(
-                    "Could not find input dataset, the connector has query configuration set",
-                    self.connector_manifest.name,
-                )
-                self.connector_manifest.lineages = lineages
-                return
-
-        SINGLE_TRANSFORM = len(transforms) == 1
-        NO_TRANSFORM = len(transforms) == 0
-        UNKNOWN_TRANSFORM = any(
-            [
-                transform["type"]
-                not in self.KNOWN_TOPICROUTING_TRANSFORMS
-                + self.KNOWN_NONTOPICROUTING_TRANSFORMS
-                for transform in transforms
-            ]
-        )
-        ALL_TRANSFORMS_NON_TOPICROUTING = all(
-            [
-                transform["type"] in self.KNOWN_NONTOPICROUTING_TRANSFORMS
-                for transform in transforms
-            ]
-        )
-
-        if NO_TRANSFORM or ALL_TRANSFORMS_NON_TOPICROUTING:
-            self.connector_manifest.lineages = self.default_get_lineages(
-                database_name=database_name,
-                source_platform=source_platform,
-                topic_prefix=topic_prefix,
-            )
-            return
-
-        if SINGLE_TRANSFORM and transforms[0]["type"] == self.REGEXROUTER:
-            tables = self.get_table_names()
-            topic_names = list(self.connector_manifest.topic_names)
-
-            from java.util.regex import Pattern
-
-            for table in tables:
-                source_table: str = table[-1]
-                topic = topic_prefix + source_table if topic_prefix else source_table
-
-                transform_regex = Pattern.compile(transforms[0]["regex"])
-                transform_replacement = transforms[0]["replacement"]
-
-                matcher = transform_regex.matcher(topic)
-                if matcher.matches():
-                    topic = str(matcher.replaceFirst(transform_replacement))
-
-                # Additional check to confirm that the topic present
-                # in connector topics
-
-                if topic in self.connector_manifest.topic_names:
-                    # include schema name for three-level hierarchies
-                    if has_three_level_hierarchy(source_platform) and len(table) > 1:
-                        source_table = f"{table[-2]}.{table[-1]}"
-
-                    dataset_name = get_dataset_name(database_name, source_table)
-
-                    lineage = KafkaConnectLineage(
-                        source_dataset=dataset_name,
-                        source_platform=source_platform,
-                        target_dataset=topic,
-                        target_platform=KAFKA,
-                    )
-                    topic_names.remove(topic)
-                    lineages.append(lineage)
-
-            if topic_names:
-                lineages.extend(
-                    self.default_get_lineages(
-                        database_name=database_name,
-                        source_platform=source_platform,
-                        topic_prefix=topic_prefix,
-                        topic_names=topic_names,
-                        include_source_dataset=False,
-                    )
-                )
-                self.report.warning(
-                    "Could not find input dataset for connector topics",
-                    f"{self.connector_manifest.name} : {topic_names}",
-                )
-            self.connector_manifest.lineages = lineages
-            return
-        else:
-            include_source_dataset = True
-            if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
-                self.report.warning(
-                    "Could not find input dataset, connector has unknown transform",
-                    f"{self.connector_manifest.name} : {transforms[0]['type']}",
-                )
-                include_source_dataset = False
-            if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
-                self.report.warning(
-                    "Could not find input dataset, connector has one or more unknown transforms",
-                    self.connector_manifest.name,
-                )
-                include_source_dataset = False
-            lineages = self.default_get_lineages(
-                database_name=database_name,
-                source_platform=source_platform,
-                topic_prefix=topic_prefix,
-                include_source_dataset=include_source_dataset,
-            )
-            self.connector_manifest.lineages = lineages
-            return
-
-
-@dataclass
-class MongoSourceConnector:
-    # https://www.mongodb.com/docs/kafka-connector/current/source-connector/
-
-    connector_manifest: ConnectorManifest
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, config: KafkaConnectSourceConfig
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self._extract_lineages()
-
-    @dataclass
-    class MongoSourceParser:
-        db_connection_url: Optional[str]
-        source_platform: str
-        database_name: Optional[str]
-        topic_prefix: Optional[str]
-        transforms: List[str]
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> MongoSourceParser:
-        parser = self.MongoSourceParser(
-            db_connection_url=connector_manifest.config.get("connection.uri"),
-            source_platform="mongodb",
-            database_name=connector_manifest.config.get("database"),
-            topic_prefix=connector_manifest.config.get("topic_prefix"),
-            transforms=(
-                connector_manifest.config["transforms"].split(",")
-                if "transforms" in connector_manifest.config
-                else []
-            ),
-        )
-
-        return parser
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        source_platform = parser.source_platform
-        topic_naming_pattern = r"mongodb\.(\w+)\.(\w+)"
-
-        if not self.connector_manifest.topic_names:
-            return lineages
-
-        for topic in self.connector_manifest.topic_names:
-            found = re.search(re.compile(topic_naming_pattern), topic)
-
-            if found:
-                table_name = get_dataset_name(found.group(1), found.group(2))
-
-                lineage = KafkaConnectLineage(
-                    source_dataset=table_name,
-                    source_platform=source_platform,
-                    target_dataset=topic,
-                    target_platform=KAFKA,
-                )
-                lineages.append(lineage)
-        self.connector_manifest.lineages = lineages
-
-
-@dataclass
-class DebeziumSourceConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self,
-        connector_manifest: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-        report: KafkaConnectSourceReport,
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class DebeziumParser:
-        source_platform: str
-        server_name: Optional[str]
-        database_name: Optional[str]
-
-    def get_server_name(self, connector_manifest: ConnectorManifest) -> str:
-        if "topic.prefix" in connector_manifest.config:
-            return connector_manifest.config["topic.prefix"]
-        else:
-            return connector_manifest.config.get("database.server.name", "")
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> DebeziumParser:
-        connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")
-
-        if connector_class == "io.debezium.connector.mysql.MySqlConnector":
-            parser = self.DebeziumParser(
-                source_platform="mysql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "MySqlConnector":
-            parser = self.DebeziumParser(
-                source_platform="mysql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "io.debezium.connector.mongodb.MongoDbConnector":
-            parser = self.DebeziumParser(
-                source_platform="mongodb",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "io.debezium.connector.postgresql.PostgresConnector":
-            parser = self.DebeziumParser(
-                source_platform="postgres",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.oracle.OracleConnector":
-            parser = self.DebeziumParser(
-                source_platform="oracle",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.sqlserver.SqlServerConnector":
-            database_name = connector_manifest.config.get(
-                "database.names"
-            ) or connector_manifest.config.get("database.dbname")
-
-            if "," in str(database_name):
-                raise Exception(
-                    f"Only one database is supported for Debezium's SQL Server connector. Found: {database_name}"
-                )
-
-            parser = self.DebeziumParser(
-                source_platform="mssql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=database_name,
-            )
-        elif connector_class == "io.debezium.connector.db2.Db2Connector":
-            parser = self.DebeziumParser(
-                source_platform="db2",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.vitess.VitessConnector":
-            parser = self.DebeziumParser(
-                source_platform="vitess",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("vitess.keyspace"),
-            )
-        else:
-            raise ValueError(f"Connector class '{connector_class}' is unknown.")
-
-        return parser
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-
-        try:
-            parser = self.get_parser(self.connector_manifest)
-            source_platform = parser.source_platform
-            server_name = parser.server_name
-            database_name = parser.database_name
-            topic_naming_pattern = rf"({server_name})\.(\w+\.\w+)"
-
-            if not self.connector_manifest.topic_names:
-                return lineages
-
-            for topic in self.connector_manifest.topic_names:
-                found = re.search(re.compile(topic_naming_pattern), topic)
-
-                if found:
-                    table_name = get_dataset_name(database_name, found.group(2))
-
-                    lineage = KafkaConnectLineage(
-                        source_dataset=table_name,
-                        source_platform=source_platform,
-                        target_dataset=topic,
-                        target_platform=KAFKA,
-                    )
-                    lineages.append(lineage)
-            self.connector_manifest.lineages = lineages
-        except Exception as e:
-            self.report.warning(
-                "Error resolving lineage for connector",
-                self.connector_manifest.name,
-                exc=e,
-            )
-
-        return
-
-
-@dataclass
-class BigQuerySinkConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class BQParser:
-        project: str
-        target_platform: str
-        sanitizeTopics: str
-        transforms: list
-        topicsToTables: Optional[str] = None
-        datasets: Optional[str] = None
-        defaultDataset: Optional[str] = None
-        version: str = "v1"
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> BQParser:
-        project = connector_manifest.config["project"]
-        sanitizeTopics = connector_manifest.config.get("sanitizeTopics", "false")
-        transform_names = (
-            self.connector_manifest.config.get("transforms", "").split(",")
-            if self.connector_manifest.config.get("transforms")
-            else []
-        )
-        transforms = []
-        for name in transform_names:
-            transform = {"name": name}
-            transforms.append(transform)
-            for key in self.connector_manifest.config.keys():
-                if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
-
-        if "defaultDataset" in connector_manifest.config:
-            defaultDataset = connector_manifest.config["defaultDataset"]
-            return self.BQParser(
-                project=project,
-                defaultDataset=defaultDataset,
-                target_platform="bigquery",
-                sanitizeTopics=sanitizeTopics.lower() == "true",
-                version="v2",
-                transforms=transforms,
-            )
-        else:
-            # version 1.6.x and similar configs supported
-            datasets = connector_manifest.config["datasets"]
-            topicsToTables = connector_manifest.config.get("topicsToTables")
-
-            return self.BQParser(
-                project=project,
-                topicsToTables=topicsToTables,
-                datasets=datasets,
-                target_platform="bigquery",
-                sanitizeTopics=sanitizeTopics.lower() == "true",
-                transforms=transforms,
-            )
-
-    def get_list(self, property: str) -> Iterable[Tuple[str, str]]:
-        entries = property.split(",")
-        for entry in entries:
-            key, val = entry.rsplit("=")
-            yield (key.strip(), val.strip())
-
-    def get_dataset_for_topic_v1(self, topic: str, parser: BQParser) -> Optional[str]:
-        topicregex_dataset_map: Dict[str, str] = dict(self.get_list(parser.datasets))  # type: ignore
-        from java.util.regex import Pattern
-
-        for pattern, dataset in topicregex_dataset_map.items():
-            patternMatcher = Pattern.compile(pattern).matcher(topic)
-            if patternMatcher.matches():
-                return dataset
-        return None
-
-    def sanitize_table_name(self, table_name):
-        table_name = re.sub("[^a-zA-Z0-9_]", "_", table_name)
-        if re.match("^[^a-zA-Z_].*", table_name):
-            table_name = "_" + table_name
-
-        return table_name
-
-    def get_dataset_table_for_topic(
-        self, topic: str, parser: BQParser
-    ) -> Optional[str]:
-        if parser.version == "v2":
-            dataset = parser.defaultDataset
-            parts = topic.split(":")
-            if len(parts) == 2:
-                dataset = parts[0]
-                table = parts[1]
-            else:
-                table = parts[0]
-        else:
-            dataset = self.get_dataset_for_topic_v1(topic, parser)
-            if dataset is None:
-                return None
-
-            table = topic
-            if parser.topicsToTables:
-                topicregex_table_map: Dict[str, str] = dict(
-                    self.get_list(parser.topicsToTables)  # type: ignore
-                )
-                from java.util.regex import Pattern
-
-                for pattern, tbl in topicregex_table_map.items():
-                    patternMatcher = Pattern.compile(pattern).matcher(topic)
-                    if patternMatcher.matches():
-                        table = tbl
-                        break
-
-        if parser.sanitizeTopics:
-            table = self.sanitize_table_name(table)
-        return f"{dataset}.{table}"
-
-    def apply_transformations(
-        self, topic: str, transforms: List[Dict[str, str]]
-    ) -> str:
-        for transform in transforms:
-            if transform["type"] == "org.apache.kafka.connect.transforms.RegexRouter":
-                regex = transform["regex"]
-                replacement = transform["replacement"]
-                pattern = re.compile(regex)
-                if pattern.match(topic):
-                    topic = pattern.sub(replacement, topic, count=1)
-        return topic
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        if not parser:
-            return lineages
-        target_platform = parser.target_platform
-        project = parser.project
-        transforms = parser.transforms
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-        # Mask/Remove properties that may reveal credentials
-        if "keyfile" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["keyfile"]
-
-        for topic in self.connector_manifest.topic_names:
-            transformed_topic = self.apply_transformations(topic, transforms)
-            dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
-            if dataset_table is None:
-                self.report.warning(
-                    "Could not find target dataset for topic, please check your connector configuration"
-                    f"{self.connector_manifest.name} : {transformed_topic} ",
-                )
-                continue
-            target_dataset = f"{project}.{dataset_table}"
-
-            lineages.append(
-                KafkaConnectLineage(
-                    source_dataset=transformed_topic,
-                    source_platform=KAFKA,
-                    target_dataset=target_dataset,
-                    target_platform=target_platform,
-                )
-            )
-        self.connector_manifest.lineages = lineages
-        return
-
-
-@dataclass
-class SnowflakeSinkConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class SnowflakeParser:
-        database_name: str
-        schema_name: str
-        topics_to_tables: Dict[str, str]
-
-    def get_table_name_from_topic_name(self, topic_name: str) -> str:
-        """
-        This function converts the topic name to a valid Snowflake table name using some rules.
-        Refer below link for more info
-        https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics
-        """
-        table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name)
-        if re.match("^[^a-zA-Z_].*", table_name):
-            table_name = "_" + table_name
-        # Connector  may append original topic's hash code as suffix for conflict resolution
-        # if generated table names for 2 topics are similar. This corner case is not handled here.
-        # Note that Snowflake recommends to choose topic names that follow the rules for
-        # Snowflake identifier names so this case is not recommended by snowflake.
-        return table_name
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> SnowflakeParser:
-        database_name = connector_manifest.config["snowflake.database.name"]
-        schema_name = connector_manifest.config["snowflake.schema.name"]
-
-        # Fetch user provided topic to table map
-        provided_topics_to_tables: Dict[str, str] = {}
-        if connector_manifest.config.get("snowflake.topic2table.map"):
-            for each in connector_manifest.config["snowflake.topic2table.map"].split(
-                ","
-            ):
-                topic, table = each.split(":")
-                provided_topics_to_tables[topic.strip()] = table.strip()
-
-        topics_to_tables: Dict[str, str] = {}
-        # Extract lineage for only those topics whose data ingestion started
-        for topic in connector_manifest.topic_names:
-            if topic in provided_topics_to_tables:
-                # If user provided which table to get mapped with this topic
-                topics_to_tables[topic] = provided_topics_to_tables[topic]
-            else:
-                # Else connector converts topic name to a valid Snowflake table name.
-                topics_to_tables[topic] = self.get_table_name_from_topic_name(topic)
-
-        return self.SnowflakeParser(
-            database_name=database_name,
-            schema_name=schema_name,
-            topics_to_tables=topics_to_tables,
-        )
-
-    def _extract_lineages(self):
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # For all snowflake sink connector properties, refer below link
-        # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector
-        # remove private keys, secrets from properties
-        secret_properties = [
-            "snowflake.private.key",
-            "snowflake.private.key.passphrase",
-            "value.converter.basic.auth.user.info",
-        ]
-        for k in secret_properties:
-            if k in self.connector_manifest.flow_property_bag:
-                del self.connector_manifest.flow_property_bag[k]
-
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-
-        for topic, table in parser.topics_to_tables.items():
-            target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}"
-            lineages.append(
-                KafkaConnectLineage(
-                    source_dataset=topic,
-                    source_platform=KAFKA,
-                    target_dataset=target_dataset,
-                    target_platform="snowflake",
-                )
-            )
-
-        self.connector_manifest.lineages = lineages
-        return
-
-
-@dataclass
-class ConfluentS3SinkConnector:
-    connector_manifest: ConnectorManifest
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class S3SinkParser:
-        target_platform: str
-        bucket: str
-        topics_dir: str
-        topics: Iterable[str]
-
-    def _get_parser(self, connector_manifest: ConnectorManifest) -> S3SinkParser:
-        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#s3
-        bucket = connector_manifest.config.get("s3.bucket.name")
-        if not bucket:
-            raise ValueError(
-                "Could not find 's3.bucket.name' in connector configuration"
-            )
-
-        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#storage
-        topics_dir = connector_manifest.config.get("topics.dir", "topics")
-
-        return self.S3SinkParser(
-            target_platform="s3",
-            bucket=bucket,
-            topics_dir=topics_dir,
-            topics=connector_manifest.topic_names,
-        )
-
-    def _extract_lineages(self):
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # remove keys, secrets from properties
-        secret_properties = [
-            "aws.access.key.id",
-            "aws.secret.access.key",
-            "s3.sse.customer.key",
-            "s3.proxy.password",
-        ]
-        for k in secret_properties:
-            if k in self.connector_manifest.flow_property_bag:
-                del self.connector_manifest.flow_property_bag[k]
-
-        try:
-            parser = self._get_parser(self.connector_manifest)
-
-            lineages: List[KafkaConnectLineage] = list()
-            for topic in parser.topics:
-                target_dataset = f"{parser.bucket}/{parser.topics_dir}/{topic}"
-
-                lineages.append(
-                    KafkaConnectLineage(
-                        source_dataset=topic,
-                        source_platform="kafka",
-                        target_dataset=target_dataset,
-                        target_platform=parser.target_platform,
-                    )
-                )
-            self.connector_manifest.lineages = lineages
-        except Exception as e:
-            self.report.warning(
-                "Error resolving lineage for connector",
-                self.connector_manifest.name,
-                exc=e,
-            )
-
-        return
-
-
-def transform_connector_config(
-    connector_config: Dict, provided_configs: List[ProvidedConfig]
-) -> None:
-    """This method will update provided configs in connector config values, if any"""
-    lookupsByProvider = {}
-    for pconfig in provided_configs:
-        lookupsByProvider[f"${{{pconfig.provider}:{pconfig.path_key}}}"] = pconfig.value
-    for k, v in connector_config.items():
-        for key, value in lookupsByProvider.items():
-            if key in v:
-                connector_config[k] = connector_config[k].replace(key, value)
-
-
-@platform_name("Kafka Connect")
-@config_class(KafkaConnectSourceConfig)
-@support_status(SupportStatus.CERTIFIED)
-@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
-@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
-@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
-class KafkaConnectSource(StatefulIngestionSourceBase):
-    config: KafkaConnectSourceConfig
-    report: KafkaConnectSourceReport
-    platform: str = "kafka-connect"
-
-    def __init__(self, config: KafkaConnectSourceConfig, ctx: PipelineContext):
-        super().__init__(config, ctx)
-        self.config = config
-        self.report = KafkaConnectSourceReport()
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            }
-        )
-
-        # Test the connection
-        if self.config.username is not None and self.config.password is not None:
-            logger.info(
-                f"Connecting to {self.config.connect_uri} with Authentication..."
-            )
-            self.session.auth = (self.config.username, self.config.password)
-
-        test_response = self.session.get(f"{self.config.connect_uri}/connectors")
-        test_response.raise_for_status()
-        logger.info(f"Connection to {self.config.connect_uri} is ok")
-        if not jpype.isJVMStarted():
-            jpype.startJVM()
-
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = KafkaConnectSourceConfig.parse_obj(config_dict)
-        return cls(config, ctx)
-
-    def get_connectors_manifest(self) -> List[ConnectorManifest]:
-        """Get Kafka Connect connectors manifest using REST API.
-        Enrich with lineages metadata.
-        """
-        connectors_manifest = list()
-
-        connector_response = self.session.get(
-            f"{self.config.connect_uri}/connectors",
-        )
-
-        payload = connector_response.json()
-
-        for connector_name in payload:
-            connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
-            connector_manifest = self._get_connector_manifest(
-                connector_name, connector_url
-            )
-            if (
-                connector_manifest is None
-                or not self.config.connector_patterns.allowed(connector_manifest.name)
-            ):
-                self.report.report_dropped(connector_name)
-                continue
-
-            if self.config.provided_configs:
-                transform_connector_config(
-                    connector_manifest.config, self.config.provided_configs
-                )
-            # Initialize connector lineages
-            connector_manifest.lineages = list()
-            connector_manifest.url = connector_url
-
-            connector_manifest.topic_names = self._get_connector_topics(connector_name)
-
-            # Populate Source Connector metadata
-            if connector_manifest.type == SOURCE:
-                connector_manifest.tasks = self._get_connector_tasks(connector_name)
-
-                # JDBC source connector lineages
-                if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
-                    "io.confluent.connect.jdbc.JdbcSourceConnector"
-                ):
-                    connector_manifest = ConfluentJDBCSourceConnector(
-                        connector_manifest=connector_manifest,
-                        config=self.config,
-                        report=self.report,
-                    ).connector_manifest
-                elif connector_manifest.config.get(CONNECTOR_CLASS, "").startswith(
-                    "io.debezium.connector"
-                ):
-                    connector_manifest = DebeziumSourceConnector(
-                        connector_manifest=connector_manifest,
-                        config=self.config,
-                        report=self.report,
-                    ).connector_manifest
-                elif (
-                    connector_manifest.config.get(CONNECTOR_CLASS, "")
-                    == "com.mongodb.kafka.connect.MongoSourceConnector"
-                ):
-                    connector_manifest = MongoSourceConnector(
-                        connector_manifest=connector_manifest, config=self.config
-                    ).connector_manifest
-                else:
-                    # Find the target connector object in the list, or log an error if unknown.
-                    target_connector = None
-                    for connector in self.config.generic_connectors:
-                        if connector.connector_name == connector_manifest.name:
-                            target_connector = connector
-                            break
-                    if not target_connector:
-                        logger.warning(
-                            f"Detected undefined connector {connector_manifest.name}, which is not in the customized connector list. Please refer to Kafka Connect ingestion recipe to define this customized connector."
-                        )
-                        continue
-
-                    for topic in connector_manifest.topic_names:
-                        lineage = KafkaConnectLineage(
-                            source_dataset=target_connector.source_dataset,
-                            source_platform=target_connector.source_platform,
-                            target_dataset=topic,
-                            target_platform=KAFKA,
-                        )
-
-                    connector_manifest.lineages.append(lineage)
-
-            if connector_manifest.type == SINK:
-                if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
-                    "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
-                ):
-                    connector_manifest = BigQuerySinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                elif connector_manifest.config.get("connector.class").__eq__(
-                    "io.confluent.connect.s3.S3SinkConnector"
-                ):
-                    connector_manifest = ConfluentS3SinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                elif connector_manifest.config.get("connector.class").__eq__(
-                    "com.snowflake.kafka.connector.SnowflakeSinkConnector"
-                ):
-                    connector_manifest = SnowflakeSinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                else:
-                    self.report.report_dropped(connector_manifest.name)
-                    logger.warning(
-                        f"Skipping connector {connector_manifest.name}. Lineage for  Connector not yet implemented"
-                    )
-                pass
-
-            connectors_manifest.append(connector_manifest)
-
-        return connectors_manifest
-
-    def _get_connector_manifest(
-        self, connector_name: str, connector_url: str
-    ) -> Optional[ConnectorManifest]:
-        try:
-            connector_response = self.session.get(connector_url)
-            connector_response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Failed to get connector details", connector_name, exc=e
-            )
-            return None
-        manifest = connector_response.json()
-        connector_manifest = ConnectorManifest(**manifest)
-        return connector_manifest
-
-    def _get_connector_tasks(self, connector_name: str) -> dict:
-        try:
-            response = self.session.get(
-                f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
-            )
-            response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Error getting connector tasks", context=connector_name, exc=e
-            )
-            return {}
-
-        return response.json()
-
-    def _get_connector_topics(self, connector_name: str) -> List[str]:
-        try:
-            response = self.session.get(
-                f"{self.config.connect_uri}/connectors/{connector_name}/topics",
-            )
-            response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Error getting connector topics", context=connector_name, exc=e
-            )
-            return []
-
-        return response.json()[connector_name]["topics"]
-
-    def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
-        connector_name = connector.name
-        connector_type = connector.type
-        connector_class = connector.config.get(CONNECTOR_CLASS)
-        flow_property_bag = connector.flow_property_bag
-        # connector_url = connector.url  # NOTE: this will expose connector credential when used
-        flow_urn = builder.make_data_flow_urn(
-            self.platform,
-            connector_name,
-            self.config.env,
-            self.config.platform_instance,
-        )
-
-        return MetadataChangeProposalWrapper(
-            entityUrn=flow_urn,
-            aspect=models.DataFlowInfoClass(
-                name=connector_name,
-                description=f"{connector_type.capitalize()} connector using `{connector_class}` plugin.",
-                customProperties=flow_property_bag,
-                # externalUrl=connector_url, # NOTE: this will expose connector credential when used
-            ),
-        ).as_workunit()
-
-    def construct_job_workunits(
-        self, connector: ConnectorManifest
-    ) -> Iterable[MetadataWorkUnit]:
-        connector_name = connector.name
-        flow_urn = builder.make_data_flow_urn(
-            self.platform,
-            connector_name,
-            self.config.env,
-            self.config.platform_instance,
-        )
-
-        lineages = connector.lineages
-        if lineages:
-            for lineage in lineages:
-                source_dataset = lineage.source_dataset
-                source_platform = lineage.source_platform
-                target_dataset = lineage.target_dataset
-                target_platform = lineage.target_platform
-                job_property_bag = lineage.job_property_bag
-
-                source_platform_instance = get_platform_instance(
-                    self.config, connector_name, source_platform
-                )
-                target_platform_instance = get_platform_instance(
-                    self.config, connector_name, target_platform
-                )
-
-                job_id = self.get_job_id(lineage, connector, self.config)
-                job_urn = builder.make_data_job_urn_with_flow(flow_urn, job_id)
-
-                inlets = (
-                    [
-                        self.make_lineage_dataset_urn(
-                            source_platform, source_dataset, source_platform_instance
-                        )
-                    ]
-                    if source_dataset
-                    else []
-                )
-                outlets = [
-                    self.make_lineage_dataset_urn(
-                        target_platform, target_dataset, target_platform_instance
-                    )
-                ]
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=job_urn,
-                    aspect=models.DataJobInfoClass(
-                        name=f"{connector_name}:{job_id}",
-                        type="COMMAND",
-                        customProperties=job_property_bag,
-                    ),
-                ).as_workunit()
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=job_urn,
-                    aspect=models.DataJobInputOutputClass(
-                        inputDatasets=inlets,
-                        outputDatasets=outlets,
-                    ),
-                ).as_workunit()
-
-    def get_job_id(
-        self,
-        lineage: KafkaConnectLineage,
-        connector: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-    ) -> str:
-        connector_class = connector.config.get(CONNECTOR_CLASS)
-
-        # Note - This block is only to maintain backward compatibility of Job URN
-        if (
-            connector_class
-            and connector.type == SOURCE
-            and (
-                "JdbcSourceConnector" in connector_class
-                or connector_class.startswith("io.debezium.connector")
-            )
-            and lineage.source_dataset
-            and config.connect_to_platform_map
-            and config.connect_to_platform_map.get(connector.name)
-            and config.connect_to_platform_map[connector.name].get(
-                lineage.source_platform
-            )
-        ):
-            return f"{config.connect_to_platform_map[connector.name][lineage.source_platform]}.{lineage.source_dataset}"
-
-        return (
-            lineage.source_dataset
-            if lineage.source_dataset
-            else f"unknown_source.{lineage.target_dataset}"
-        )
-
-    def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
-        return [
-            *super().get_workunit_processors(),
-            StaleEntityRemovalHandler.create(
-                self, self.config, self.ctx
-            ).workunit_processor,
-        ]
-
-    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
-        connectors_manifest = self.get_connectors_manifest()
-        for connector in connectors_manifest:
-            name = connector.name
-
-            yield self.construct_flow_workunit(connector)
-            yield from self.construct_job_workunits(connector)
-            self.report.report_connector_scanned(name)
-
-    def get_report(self) -> KafkaConnectSourceReport:
-        return self.report
-
-    def make_lineage_dataset_urn(
-        self, platform: str, name: str, platform_instance: Optional[str]
-    ) -> str:
-        if self.config.convert_lineage_urns_to_lowercase:
-            name = name.lower()
-
-        return builder.make_dataset_urn_with_platform_instance(
-            platform, name, platform_instance, self.config.env
-        )
-
-
-# TODO: Find a more automated way to discover new platforms with 3 level naming hierarchy.
-def has_three_level_hierarchy(platform: str) -> bool:
-    return platform in ["postgres", "trino", "redshift", "snowflake"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py
new file mode 100644
index 0000000000000..36f6a96c0d408
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py
@@ -0,0 +1,202 @@
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, Iterable, List, Optional
+
+from pydantic.fields import Field
+
+from datahub.configuration.common import AllowDenyPattern, ConfigModel
+from datahub.configuration.source_common import (
+    DatasetLineageProviderConfigBase,
+    PlatformInstanceConfigMixin,
+)
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalSourceReport,
+    StatefulStaleMetadataRemovalConfig,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    StatefulIngestionConfigBase,
+)
+
+logger = logging.getLogger(__name__)
+
+KAFKA = "kafka"
+SOURCE = "source"
+SINK = "sink"
+CONNECTOR_CLASS = "connector.class"
+
+
+class ProvidedConfig(ConfigModel):
+    provider: str
+    path_key: str
+    value: str
+
+
+class GenericConnectorConfig(ConfigModel):
+    connector_name: str
+    source_dataset: str
+    source_platform: str
+
+
+class KafkaConnectSourceConfig(
+    PlatformInstanceConfigMixin,
+    DatasetLineageProviderConfigBase,
+    StatefulIngestionConfigBase,
+):
+    # See the Connect REST Interface for details
+    # https://docs.confluent.io/platform/current/connect/references/restapi.html#
+    connect_uri: str = Field(
+        default="http://localhost:8083/", description="URI to connect to."
+    )
+    username: Optional[str] = Field(default=None, description="Kafka Connect username.")
+    password: Optional[str] = Field(default=None, description="Kafka Connect password.")
+    cluster_name: Optional[str] = Field(
+        default="connect-cluster", description="Cluster to ingest from."
+    )
+    # convert lineage dataset's urns to lowercase
+    convert_lineage_urns_to_lowercase: bool = Field(
+        default=False,
+        description="Whether to convert the urns of ingested lineage dataset to lowercase",
+    )
+    connector_patterns: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description="regex patterns for connectors to filter for ingestion.",
+    )
+    provided_configs: Optional[List[ProvidedConfig]] = Field(
+        default=None, description="Provided Configurations"
+    )
+    connect_to_platform_map: Optional[Dict[str, Dict[str, str]]] = Field(
+        default=None,
+        description='Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { "postgres-connector-finance-db": "postgres": "core_finance_instance" }`',
+    )
+    platform_instance_map: Optional[Dict[str, str]] = Field(
+        default=None,
+        description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
+    )
+    generic_connectors: List[GenericConnectorConfig] = Field(
+        default=[],
+        description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector",
+    )
+
+    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
+
+
+@dataclass
+class KafkaConnectSourceReport(StaleEntityRemovalSourceReport):
+    connectors_scanned: int = 0
+    filtered: List[str] = field(default_factory=list)
+
+    def report_connector_scanned(self, connector: str) -> None:
+        self.connectors_scanned += 1
+
+    def report_dropped(self, connector: str) -> None:
+        self.filtered.append(connector)
+
+
+@dataclass
+class KafkaConnectLineage:
+    """Class to store Kafka Connect lineage mapping, Each instance is potential DataJob"""
+
+    source_platform: str
+    target_dataset: str
+    target_platform: str
+    job_property_bag: Optional[Dict[str, str]] = None
+    source_dataset: Optional[str] = None
+
+
+@dataclass
+class ConnectorManifest:
+    """Each instance is potential DataFlow"""
+
+    name: str
+    type: str
+    config: Dict
+    tasks: Dict
+    url: Optional[str] = None
+    flow_property_bag: Optional[Dict[str, str]] = None
+    lineages: List[KafkaConnectLineage] = field(default_factory=list)
+    topic_names: Iterable[str] = field(default_factory=list)
+
+
+def remove_prefix(text: str, prefix: str) -> str:
+    if text.startswith(prefix):
+        index = len(prefix)
+        return text[index:]
+    return text
+
+
+def unquote(
+    string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
+) -> str:
+    """
+    If string starts and ends with a quote, unquote it
+    """
+    trailing_quote = trailing_quote if trailing_quote else leading_quote
+    if string.startswith(leading_quote) and string.endswith(trailing_quote):
+        string = string[1:-1]
+    return string
+
+
+def get_dataset_name(
+    database_name: Optional[str],
+    source_table: str,
+) -> str:
+    if database_name:
+        dataset_name = database_name + "." + source_table
+    else:
+        dataset_name = source_table
+
+    return dataset_name
+
+
+def get_platform_instance(
+    config: KafkaConnectSourceConfig, connector_name: str, platform: str
+) -> Optional[str]:
+    instance_name = None
+    if (
+        config.connect_to_platform_map
+        and config.connect_to_platform_map.get(connector_name)
+        and config.connect_to_platform_map[connector_name].get(platform)
+    ):
+        instance_name = config.connect_to_platform_map[connector_name][platform]
+        if config.platform_instance_map and config.platform_instance_map.get(platform):
+            logger.warning(
+                f"Same source platform {platform} configured in both platform_instance_map and connect_to_platform_map."
+                "Will prefer connector specific platform instance from connect_to_platform_map."
+            )
+    elif config.platform_instance_map and config.platform_instance_map.get(platform):
+        instance_name = config.platform_instance_map[platform]
+    logger.info(
+        f"Instance name assigned is: {instance_name} for Connector Name {connector_name} and platform {platform}"
+    )
+    return instance_name
+
+
+def transform_connector_config(
+    connector_config: Dict, provided_configs: List[ProvidedConfig]
+) -> None:
+    """This method will update provided configs in connector config values, if any"""
+    lookupsByProvider = {}
+    for pconfig in provided_configs:
+        lookupsByProvider[f"${{{pconfig.provider}:{pconfig.path_key}}}"] = pconfig.value
+    for k, v in connector_config.items():
+        for key, value in lookupsByProvider.items():
+            if key in v:
+                connector_config[k] = connector_config[k].replace(key, value)
+
+
+# TODO: Find a more automated way to discover new platforms with 3 level naming hierarchy.
+def has_three_level_hierarchy(platform: str) -> bool:
+    return platform in ["postgres", "trino", "redshift", "snowflake"]
+
+
+@dataclass
+class BaseConnector:
+    connector_manifest: ConnectorManifest
+    config: KafkaConnectSourceConfig
+    report: KafkaConnectSourceReport
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        return []
+
+    def extract_flow_property_bag(self) -> Optional[Dict[str, str]]:
+        return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
new file mode 100644
index 0000000000000..fa6b614c4b52a
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
@@ -0,0 +1,367 @@
+import logging
+from typing import Iterable, List, Optional, Type
+
+import jpype
+import jpype.imports
+import requests
+
+import datahub.emitter.mce_builder as builder
+import datahub.metadata.schema_classes as models
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SourceCapability,
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.kafka_connect.common import (
+    CONNECTOR_CLASS,
+    SINK,
+    SOURCE,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+    KafkaConnectSourceConfig,
+    KafkaConnectSourceReport,
+    get_platform_instance,
+    transform_connector_config,
+)
+from datahub.ingestion.source.kafka_connect.sink_connectors import (
+    BIGQUERY_SINK_CONNECTOR_CLASS,
+    S3_SINK_CONNECTOR_CLASS,
+    SNOWFLAKE_SINK_CONNECTOR_CLASS,
+    BigQuerySinkConnector,
+    ConfluentS3SinkConnector,
+    SnowflakeSinkConnector,
+)
+from datahub.ingestion.source.kafka_connect.source_connectors import (
+    DEBEZIUM_SOURCE_CONNECTOR_PREFIX,
+    JDBC_SOURCE_CONNECTOR_CLASS,
+    MONGO_SOURCE_CONNECTOR_CLASS,
+    ConfigDrivenSourceConnector,
+    ConfluentJDBCSourceConnector,
+    DebeziumSourceConnector,
+    MongoSourceConnector,
+)
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalHandler,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    StatefulIngestionSourceBase,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@platform_name("Kafka Connect")
+@config_class(KafkaConnectSourceConfig)
+@support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
+class KafkaConnectSource(StatefulIngestionSourceBase):
+    config: KafkaConnectSourceConfig
+    report: KafkaConnectSourceReport
+    platform: str = "kafka-connect"
+
+    def __init__(self, config: KafkaConnectSourceConfig, ctx: PipelineContext):
+        super().__init__(config, ctx)
+        self.config = config
+        self.report = KafkaConnectSourceReport()
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            }
+        )
+
+        # Test the connection
+        if self.config.username is not None and self.config.password is not None:
+            logger.info(
+                f"Connecting to {self.config.connect_uri} with Authentication..."
+            )
+            self.session.auth = (self.config.username, self.config.password)
+
+        test_response = self.session.get(f"{self.config.connect_uri}/connectors")
+        test_response.raise_for_status()
+        logger.info(f"Connection to {self.config.connect_uri} is ok")
+        if not jpype.isJVMStarted():
+            jpype.startJVM()
+
+    @classmethod
+    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+        config = KafkaConnectSourceConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
+        """Get Kafka Connect connectors manifest using REST API.
+        Enrich with lineages metadata.
+        """
+
+        connector_response = self.session.get(
+            f"{self.config.connect_uri}/connectors",
+        )
+
+        payload = connector_response.json()
+
+        for connector_name in payload:
+            connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
+            connector_manifest = self._get_connector_manifest(
+                connector_name, connector_url
+            )
+            if (
+                connector_manifest is None
+                or not self.config.connector_patterns.allowed(connector_manifest.name)
+            ):
+                self.report.report_dropped(connector_name)
+                continue
+
+            if self.config.provided_configs:
+                transform_connector_config(
+                    connector_manifest.config, self.config.provided_configs
+                )
+            connector_manifest.url = connector_url
+            connector_manifest.topic_names = self._get_connector_topics(connector_name)
+            connector_class_value = connector_manifest.config.get(CONNECTOR_CLASS) or ""
+
+            class_type: Type[BaseConnector] = BaseConnector
+
+            # Populate Source Connector metadata
+            if connector_manifest.type == SOURCE:
+                connector_manifest.tasks = self._get_connector_tasks(connector_name)
+
+                # JDBC source connector lineages
+                if connector_class_value == JDBC_SOURCE_CONNECTOR_CLASS:
+                    class_type = ConfluentJDBCSourceConnector
+                elif connector_class_value.startswith(DEBEZIUM_SOURCE_CONNECTOR_PREFIX):
+                    class_type = DebeziumSourceConnector
+                elif connector_class_value == MONGO_SOURCE_CONNECTOR_CLASS:
+                    class_type = MongoSourceConnector
+                elif any(
+                    [
+                        connector.connector_name == connector_manifest.name
+                        for connector in self.config.generic_connectors
+                    ]
+                ):
+                    class_type = ConfigDrivenSourceConnector
+                else:
+                    self.report.report_dropped(connector_manifest.name)
+                    self.report.warning(
+                        "Lineage for Source Connector not supported. "
+                        "Please refer to Kafka Connect docs to use `generic_connectors` config.",
+                        context=f"{connector_manifest.name} of type {connector_class_value}",
+                    )
+                    continue
+            elif connector_manifest.type == SINK:
+                if connector_class_value == BIGQUERY_SINK_CONNECTOR_CLASS:
+                    class_type = BigQuerySinkConnector
+                elif connector_class_value == S3_SINK_CONNECTOR_CLASS:
+                    class_type = ConfluentS3SinkConnector
+                elif connector_class_value == SNOWFLAKE_SINK_CONNECTOR_CLASS:
+                    class_type = SnowflakeSinkConnector
+                else:
+                    self.report.report_dropped(connector_manifest.name)
+                    self.report.warning(
+                        "Lineage for Sink Connector not supported.",
+                        context=f"{connector_manifest.name} of type {connector_class_value}",
+                    )
+
+            connector_class = class_type(connector_manifest, self.config, self.report)
+            connector_manifest.lineages = connector_class.extract_lineages()
+            connector_manifest.flow_property_bag = (
+                connector_class.extract_flow_property_bag()
+            )
+
+            yield connector_manifest
+
+    def _get_connector_manifest(
+        self, connector_name: str, connector_url: str
+    ) -> Optional[ConnectorManifest]:
+        try:
+            connector_response = self.session.get(connector_url)
+            connector_response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Failed to get connector details", connector_name, exc=e
+            )
+            return None
+        manifest = connector_response.json()
+        connector_manifest = ConnectorManifest(**manifest)
+        return connector_manifest
+
+    def _get_connector_tasks(self, connector_name: str) -> dict:
+        try:
+            response = self.session.get(
+                f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
+            )
+            response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Error getting connector tasks", context=connector_name, exc=e
+            )
+            return {}
+
+        return response.json()
+
+    def _get_connector_topics(self, connector_name: str) -> List[str]:
+        try:
+            response = self.session.get(
+                f"{self.config.connect_uri}/connectors/{connector_name}/topics",
+            )
+            response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Error getting connector topics", context=connector_name, exc=e
+            )
+            return []
+
+        return response.json()[connector_name]["topics"]
+
+    def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
+        connector_name = connector.name
+        connector_type = connector.type
+        connector_class = connector.config.get(CONNECTOR_CLASS)
+        flow_property_bag = connector.flow_property_bag
+        # connector_url = connector.url  # NOTE: this will expose connector credential when used
+        flow_urn = builder.make_data_flow_urn(
+            self.platform,
+            connector_name,
+            self.config.env,
+            self.config.platform_instance,
+        )
+
+        return MetadataChangeProposalWrapper(
+            entityUrn=flow_urn,
+            aspect=models.DataFlowInfoClass(
+                name=connector_name,
+                description=f"{connector_type.capitalize()} connector using `{connector_class}` plugin.",
+                customProperties=flow_property_bag,
+                # externalUrl=connector_url, # NOTE: this will expose connector credential when used
+            ),
+        ).as_workunit()
+
+    def construct_job_workunits(
+        self, connector: ConnectorManifest
+    ) -> Iterable[MetadataWorkUnit]:
+        connector_name = connector.name
+        flow_urn = builder.make_data_flow_urn(
+            self.platform,
+            connector_name,
+            self.config.env,
+            self.config.platform_instance,
+        )
+
+        lineages = connector.lineages
+        if lineages:
+            for lineage in lineages:
+                source_dataset = lineage.source_dataset
+                source_platform = lineage.source_platform
+                target_dataset = lineage.target_dataset
+                target_platform = lineage.target_platform
+                job_property_bag = lineage.job_property_bag
+
+                source_platform_instance = get_platform_instance(
+                    self.config, connector_name, source_platform
+                )
+                target_platform_instance = get_platform_instance(
+                    self.config, connector_name, target_platform
+                )
+
+                job_id = self.get_job_id(lineage, connector, self.config)
+                job_urn = builder.make_data_job_urn_with_flow(flow_urn, job_id)
+
+                inlets = (
+                    [
+                        self.make_lineage_dataset_urn(
+                            source_platform, source_dataset, source_platform_instance
+                        )
+                    ]
+                    if source_dataset
+                    else []
+                )
+                outlets = [
+                    self.make_lineage_dataset_urn(
+                        target_platform, target_dataset, target_platform_instance
+                    )
+                ]
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=job_urn,
+                    aspect=models.DataJobInfoClass(
+                        name=f"{connector_name}:{job_id}",
+                        type="COMMAND",
+                        customProperties=job_property_bag,
+                    ),
+                ).as_workunit()
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=job_urn,
+                    aspect=models.DataJobInputOutputClass(
+                        inputDatasets=inlets,
+                        outputDatasets=outlets,
+                    ),
+                ).as_workunit()
+
+    def get_job_id(
+        self,
+        lineage: KafkaConnectLineage,
+        connector: ConnectorManifest,
+        config: KafkaConnectSourceConfig,
+    ) -> str:
+        connector_class = connector.config.get(CONNECTOR_CLASS)
+
+        # Note - This block is only to maintain backward compatibility of Job URN
+        if (
+            connector_class
+            and connector.type == SOURCE
+            and (
+                "JdbcSourceConnector" in connector_class
+                or connector_class.startswith("io.debezium.connector")
+            )
+            and lineage.source_dataset
+            and config.connect_to_platform_map
+            and config.connect_to_platform_map.get(connector.name)
+            and config.connect_to_platform_map[connector.name].get(
+                lineage.source_platform
+            )
+        ):
+            return f"{config.connect_to_platform_map[connector.name][lineage.source_platform]}.{lineage.source_dataset}"
+
+        return (
+            lineage.source_dataset
+            if lineage.source_dataset
+            else f"unknown_source.{lineage.target_dataset}"
+        )
+
+    def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
+        return [
+            *super().get_workunit_processors(),
+            StaleEntityRemovalHandler.create(
+                self, self.config, self.ctx
+            ).workunit_processor,
+        ]
+
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        for connector in self.get_connectors_manifest():
+            yield self.construct_flow_workunit(connector)
+            yield from self.construct_job_workunits(connector)
+            self.report.report_connector_scanned(connector.name)
+
+    def get_report(self) -> KafkaConnectSourceReport:
+        return self.report
+
+    def make_lineage_dataset_urn(
+        self, platform: str, name: str, platform_instance: Optional[str]
+    ) -> str:
+        if self.config.convert_lineage_urns_to_lowercase:
+            name = name.lower()
+
+        return builder.make_dataset_urn_with_platform_instance(
+            platform, name, platform_instance, self.config.env
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
new file mode 100644
index 0000000000000..2790460c8e601
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
@@ -0,0 +1,341 @@
+import re
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from datahub.ingestion.source.kafka_connect.common import (
+    KAFKA,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+)
+
+
+@dataclass
+class ConfluentS3SinkConnector(BaseConnector):
+    @dataclass
+    class S3SinkParser:
+        target_platform: str
+        bucket: str
+        topics_dir: str
+        topics: Iterable[str]
+
+    def _get_parser(self, connector_manifest: ConnectorManifest) -> S3SinkParser:
+        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#s3
+        bucket = connector_manifest.config.get("s3.bucket.name")
+        if not bucket:
+            raise ValueError(
+                "Could not find 's3.bucket.name' in connector configuration"
+            )
+
+        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#storage
+        topics_dir = connector_manifest.config.get("topics.dir", "topics")
+
+        return self.S3SinkParser(
+            target_platform="s3",
+            bucket=bucket,
+            topics_dir=topics_dir,
+            topics=connector_manifest.topic_names,
+        )
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k
+            not in [
+                "aws.access.key.id",
+                "aws.secret.access.key",
+                "s3.sse.customer.key",
+                "s3.proxy.password",
+            ]
+        }
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        try:
+            parser = self._get_parser(self.connector_manifest)
+
+            lineages: List[KafkaConnectLineage] = list()
+            for topic in parser.topics:
+                target_dataset = f"{parser.bucket}/{parser.topics_dir}/{topic}"
+
+                lineages.append(
+                    KafkaConnectLineage(
+                        source_dataset=topic,
+                        source_platform="kafka",
+                        target_dataset=target_dataset,
+                        target_platform=parser.target_platform,
+                    )
+                )
+            return lineages
+        except Exception as e:
+            self.report.warning(
+                "Error resolving lineage for connector",
+                self.connector_manifest.name,
+                exc=e,
+            )
+
+        return []
+
+
+@dataclass
+class SnowflakeSinkConnector(BaseConnector):
+    @dataclass
+    class SnowflakeParser:
+        database_name: str
+        schema_name: str
+        topics_to_tables: Dict[str, str]
+
+    def get_table_name_from_topic_name(self, topic_name: str) -> str:
+        """
+        This function converts the topic name to a valid Snowflake table name using some rules.
+        Refer below link for more info
+        https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics
+        """
+        table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name)
+        if re.match("^[^a-zA-Z_].*", table_name):
+            table_name = "_" + table_name
+        # Connector  may append original topic's hash code as suffix for conflict resolution
+        # if generated table names for 2 topics are similar. This corner case is not handled here.
+        # Note that Snowflake recommends to choose topic names that follow the rules for
+        # Snowflake identifier names so this case is not recommended by snowflake.
+        return table_name
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> SnowflakeParser:
+        database_name = connector_manifest.config["snowflake.database.name"]
+        schema_name = connector_manifest.config["snowflake.schema.name"]
+
+        # Fetch user provided topic to table map
+        provided_topics_to_tables: Dict[str, str] = {}
+        if connector_manifest.config.get("snowflake.topic2table.map"):
+            for each in connector_manifest.config["snowflake.topic2table.map"].split(
+                ","
+            ):
+                topic, table = each.split(":")
+                provided_topics_to_tables[topic.strip()] = table.strip()
+
+        topics_to_tables: Dict[str, str] = {}
+        # Extract lineage for only those topics whose data ingestion started
+        for topic in connector_manifest.topic_names:
+            if topic in provided_topics_to_tables:
+                # If user provided which table to get mapped with this topic
+                topics_to_tables[topic] = provided_topics_to_tables[topic]
+            else:
+                # Else connector converts topic name to a valid Snowflake table name.
+                topics_to_tables[topic] = self.get_table_name_from_topic_name(topic)
+
+        return self.SnowflakeParser(
+            database_name=database_name,
+            schema_name=schema_name,
+            topics_to_tables=topics_to_tables,
+        )
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # For all snowflake sink connector properties, refer below link
+        # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector
+        # remove private keys, secrets from properties
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k
+            not in [
+                "snowflake.private.key",
+                "snowflake.private.key.passphrase",
+                "value.converter.basic.auth.user.info",
+            ]
+        }
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+
+        for topic, table in parser.topics_to_tables.items():
+            target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}"
+            lineages.append(
+                KafkaConnectLineage(
+                    source_dataset=topic,
+                    source_platform=KAFKA,
+                    target_dataset=target_dataset,
+                    target_platform="snowflake",
+                )
+            )
+
+        return lineages
+
+
+@dataclass
+class BigQuerySinkConnector(BaseConnector):
+    @dataclass
+    class BQParser:
+        project: str
+        target_platform: str
+        sanitizeTopics: str
+        transforms: list
+        topicsToTables: Optional[str] = None
+        datasets: Optional[str] = None
+        defaultDataset: Optional[str] = None
+        version: str = "v1"
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> BQParser:
+        project = connector_manifest.config["project"]
+        sanitizeTopics = connector_manifest.config.get("sanitizeTopics", "false")
+        transform_names = (
+            self.connector_manifest.config.get("transforms", "").split(",")
+            if self.connector_manifest.config.get("transforms")
+            else []
+        )
+        transforms = []
+        for name in transform_names:
+            transform = {"name": name}
+            transforms.append(transform)
+            for key in self.connector_manifest.config.keys():
+                if key.startswith(f"transforms.{name}."):
+                    transform[
+                        key.replace(f"transforms.{name}.", "")
+                    ] = self.connector_manifest.config[key]
+
+        if "defaultDataset" in connector_manifest.config:
+            defaultDataset = connector_manifest.config["defaultDataset"]
+            return self.BQParser(
+                project=project,
+                defaultDataset=defaultDataset,
+                target_platform="bigquery",
+                sanitizeTopics=sanitizeTopics.lower() == "true",
+                version="v2",
+                transforms=transforms,
+            )
+        else:
+            # version 1.6.x and similar configs supported
+            datasets = connector_manifest.config["datasets"]
+            topicsToTables = connector_manifest.config.get("topicsToTables")
+
+            return self.BQParser(
+                project=project,
+                topicsToTables=topicsToTables,
+                datasets=datasets,
+                target_platform="bigquery",
+                sanitizeTopics=sanitizeTopics.lower() == "true",
+                transforms=transforms,
+            )
+
+    def get_list(self, property: str) -> Iterable[Tuple[str, str]]:
+        entries = property.split(",")
+        for entry in entries:
+            key, val = entry.rsplit("=")
+            yield (key.strip(), val.strip())
+
+    def get_dataset_for_topic_v1(self, topic: str, parser: BQParser) -> Optional[str]:
+        topicregex_dataset_map: Dict[str, str] = dict(self.get_list(parser.datasets))  # type: ignore
+        from java.util.regex import Pattern
+
+        for pattern, dataset in topicregex_dataset_map.items():
+            patternMatcher = Pattern.compile(pattern).matcher(topic)
+            if patternMatcher.matches():
+                return dataset
+        return None
+
+    def sanitize_table_name(self, table_name):
+        table_name = re.sub("[^a-zA-Z0-9_]", "_", table_name)
+        if re.match("^[^a-zA-Z_].*", table_name):
+            table_name = "_" + table_name
+
+        return table_name
+
+    def get_dataset_table_for_topic(
+        self, topic: str, parser: BQParser
+    ) -> Optional[str]:
+        if parser.version == "v2":
+            dataset = parser.defaultDataset
+            parts = topic.split(":")
+            if len(parts) == 2:
+                dataset = parts[0]
+                table = parts[1]
+            else:
+                table = parts[0]
+        else:
+            dataset = self.get_dataset_for_topic_v1(topic, parser)
+            if dataset is None:
+                return None
+
+            table = topic
+            if parser.topicsToTables:
+                topicregex_table_map: Dict[str, str] = dict(
+                    self.get_list(parser.topicsToTables)  # type: ignore
+                )
+                from java.util.regex import Pattern
+
+                for pattern, tbl in topicregex_table_map.items():
+                    patternMatcher = Pattern.compile(pattern).matcher(topic)
+                    if patternMatcher.matches():
+                        table = tbl
+                        break
+
+        if parser.sanitizeTopics:
+            table = self.sanitize_table_name(table)
+        return f"{dataset}.{table}"
+
+    def apply_transformations(
+        self, topic: str, transforms: List[Dict[str, str]]
+    ) -> str:
+        for transform in transforms:
+            if transform["type"] == "org.apache.kafka.connect.transforms.RegexRouter":
+                regex = transform["regex"]
+                replacement = transform["replacement"]
+                pattern = re.compile(regex)
+                if pattern.match(topic):
+                    topic = pattern.sub(replacement, topic, count=1)
+        return topic
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k not in ["keyfile"]
+        }
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        if not parser:
+            return lineages
+        target_platform = parser.target_platform
+        project = parser.project
+        transforms = parser.transforms
+
+        for topic in self.connector_manifest.topic_names:
+            transformed_topic = self.apply_transformations(topic, transforms)
+            dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
+            if dataset_table is None:
+                self.report.warning(
+                    "Could not find target dataset for topic, please check your connector configuration"
+                    f"{self.connector_manifest.name} : {transformed_topic} ",
+                )
+                continue
+            target_dataset = f"{project}.{dataset_table}"
+
+            lineages.append(
+                KafkaConnectLineage(
+                    source_dataset=transformed_topic,
+                    source_platform=KAFKA,
+                    target_dataset=target_dataset,
+                    target_platform=target_platform,
+                )
+            )
+        return lineages
+
+
+BIGQUERY_SINK_CONNECTOR_CLASS = "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
+S3_SINK_CONNECTOR_CLASS = "io.confluent.connect.s3.S3SinkConnector"
+SNOWFLAKE_SINK_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeSinkConnector"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
new file mode 100644
index 0000000000000..7b3b6e551a0a1
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
@@ -0,0 +1,570 @@
+import logging
+import re
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from sqlalchemy.engine.url import make_url
+
+from datahub.ingestion.source.kafka_connect.common import (
+    CONNECTOR_CLASS,
+    KAFKA,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+    get_dataset_name,
+    has_three_level_hierarchy,
+    remove_prefix,
+    unquote,
+)
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
+
+
+@dataclass
+class ConfluentJDBCSourceConnector(BaseConnector):
+    REGEXROUTER = "org.apache.kafka.connect.transforms.RegexRouter"
+    KNOWN_TOPICROUTING_TRANSFORMS = [REGEXROUTER]
+    # https://kafka.apache.org/documentation/#connect_included_transformation
+    KAFKA_NONTOPICROUTING_TRANSFORMS = [
+        "InsertField",
+        "InsertField$Key",
+        "InsertField$Value",
+        "ReplaceField",
+        "ReplaceField$Key",
+        "ReplaceField$Value",
+        "MaskField",
+        "MaskField$Key",
+        "MaskField$Value",
+        "ValueToKey",
+        "ValueToKey$Key",
+        "ValueToKey$Value",
+        "HoistField",
+        "HoistField$Key",
+        "HoistField$Value",
+        "ExtractField",
+        "ExtractField$Key",
+        "ExtractField$Value",
+        "SetSchemaMetadata",
+        "SetSchemaMetadata$Key",
+        "SetSchemaMetadata$Value",
+        "Flatten",
+        "Flatten$Key",
+        "Flatten$Value",
+        "Cast",
+        "Cast$Key",
+        "Cast$Value",
+        "HeadersFrom",
+        "HeadersFrom$Key",
+        "HeadersFrom$Value",
+        "TimestampConverter",
+        "Filter",
+        "InsertHeader",
+        "DropHeaders",
+    ]
+    # https://docs.confluent.io/platform/current/connect/transforms/overview.html
+    CONFLUENT_NONTOPICROUTING_TRANSFORMS = [
+        "Drop",
+        "Drop$Key",
+        "Drop$Value",
+        "Filter",
+        "Filter$Key",
+        "Filter$Value",
+        "TombstoneHandler",
+    ]
+    KNOWN_NONTOPICROUTING_TRANSFORMS = (
+        KAFKA_NONTOPICROUTING_TRANSFORMS
+        + [
+            f"org.apache.kafka.connect.transforms.{t}"
+            for t in KAFKA_NONTOPICROUTING_TRANSFORMS
+        ]
+        + CONFLUENT_NONTOPICROUTING_TRANSFORMS
+        + [
+            f"io.confluent.connect.transforms.{t}"
+            for t in CONFLUENT_NONTOPICROUTING_TRANSFORMS
+        ]
+    )
+
+    @dataclass
+    class JdbcParser:
+        db_connection_url: str
+        source_platform: str
+        database_name: str
+        topic_prefix: str
+        query: str
+        transforms: list
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> JdbcParser:
+        url = remove_prefix(
+            str(connector_manifest.config.get("connection.url")), "jdbc:"
+        )
+        url_instance = make_url(url)
+        source_platform = get_platform_from_sqlalchemy_uri(str(url_instance))
+        database_name = url_instance.database
+        assert database_name
+        db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
+
+        topic_prefix = self.connector_manifest.config.get("topic.prefix", None)
+
+        query = self.connector_manifest.config.get("query", None)
+
+        transform_names = (
+            self.connector_manifest.config.get("transforms", "").split(",")
+            if self.connector_manifest.config.get("transforms")
+            else []
+        )
+
+        transforms = []
+        for name in transform_names:
+            transform = {"name": name}
+            transforms.append(transform)
+            for key in self.connector_manifest.config.keys():
+                if key.startswith(f"transforms.{name}."):
+                    transform[
+                        key.replace(f"transforms.{name}.", "")
+                    ] = self.connector_manifest.config[key]
+
+        return self.JdbcParser(
+            db_connection_url,
+            source_platform,
+            database_name,
+            topic_prefix,
+            query,
+            transforms,
+        )
+
+    def default_get_lineages(
+        self,
+        topic_prefix: str,
+        database_name: str,
+        source_platform: str,
+        topic_names: Optional[Iterable[str]] = None,
+        include_source_dataset: bool = True,
+    ) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = []
+        if not topic_names:
+            topic_names = self.connector_manifest.topic_names
+        table_name_tuples: List[Tuple] = self.get_table_names()
+        for topic in topic_names:
+            # All good for NO_TRANSFORM or (SINGLE_TRANSFORM and KNOWN_NONTOPICROUTING_TRANSFORM) or (not SINGLE_TRANSFORM and all(KNOWN_NONTOPICROUTING_TRANSFORM))
+            source_table: str = (
+                remove_prefix(topic, topic_prefix) if topic_prefix else topic
+            )
+            # include schema name for three-level hierarchies
+            if has_three_level_hierarchy(source_platform):
+                table_name_tuple: Tuple = next(
+                    iter([t for t in table_name_tuples if t and t[-1] == source_table]),
+                    (),
+                )
+                if len(table_name_tuple) > 1:
+                    source_table = f"{table_name_tuple[-2]}.{source_table}"
+                else:
+                    include_source_dataset = False
+                    self.report.warning(
+                        "Could not find schema for table"
+                        f"{self.connector_manifest.name} : {source_table}",
+                    )
+            dataset_name: str = get_dataset_name(database_name, source_table)
+            lineage = KafkaConnectLineage(
+                source_dataset=dataset_name if include_source_dataset else None,
+                source_platform=source_platform,
+                target_dataset=topic,
+                target_platform=KAFKA,
+            )
+            lineages.append(lineage)
+        return lineages
+
+    def get_table_names(self) -> List[Tuple]:
+        sep: str = "."
+        leading_quote_char: str = '"'
+        trailing_quote_char: str = leading_quote_char
+
+        table_ids: List[str] = []
+        if self.connector_manifest.tasks:
+            table_ids = (
+                ",".join(
+                    [
+                        task["config"].get("tables")
+                        for task in self.connector_manifest.tasks
+                    ]
+                )
+            ).split(",")
+            quote_method = self.connector_manifest.config.get(
+                "quote.sql.identifiers", "always"
+            )
+            if (
+                quote_method == "always"
+                and table_ids
+                and table_ids[0]
+                and table_ids[-1]
+            ):
+                leading_quote_char = table_ids[0][0]
+                trailing_quote_char = table_ids[-1][-1]
+                # This will only work for single character quotes
+        elif self.connector_manifest.config.get("table.whitelist"):
+            table_ids = self.connector_manifest.config.get("table.whitelist").split(",")  # type: ignore
+
+        # List of Tuple containing (schema, table)
+        tables: List[Tuple] = [
+            (
+                (
+                    unquote(
+                        table_id.split(sep)[-2], leading_quote_char, trailing_quote_char
+                    )
+                    if len(table_id.split(sep)) > 1
+                    else ""
+                ),
+                unquote(
+                    table_id.split(sep)[-1], leading_quote_char, trailing_quote_char
+                ),
+            )
+            for table_id in table_ids
+        ]
+        return tables
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k not in ["connection.password", "connection.user"]
+        }
+
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag["connection.url"] = self.get_parser(
+            self.connector_manifest
+        ).db_connection_url
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        source_platform = parser.source_platform
+        database_name = parser.database_name
+        query = parser.query
+        topic_prefix = parser.topic_prefix
+        transforms = parser.transforms
+
+        logging.debug(
+            f"Extracting source platform: {source_platform} and database name: {database_name} from connection url "
+        )
+
+        if not self.connector_manifest.topic_names:
+            return lineages
+
+        if query:
+            # Lineage source_table can be extracted by parsing query
+            for topic in self.connector_manifest.topic_names:
+                # default method - as per earlier implementation
+                dataset_name: str = get_dataset_name(database_name, topic)
+
+                lineage = KafkaConnectLineage(
+                    source_dataset=None,
+                    source_platform=source_platform,
+                    target_dataset=topic,
+                    target_platform=KAFKA,
+                )
+                lineages.append(lineage)
+                self.report.warning(
+                    "Could not find input dataset, the connector has query configuration set",
+                    self.connector_manifest.name,
+                )
+                return lineages
+
+        SINGLE_TRANSFORM = len(transforms) == 1
+        NO_TRANSFORM = len(transforms) == 0
+        UNKNOWN_TRANSFORM = any(
+            [
+                transform["type"]
+                not in self.KNOWN_TOPICROUTING_TRANSFORMS
+                + self.KNOWN_NONTOPICROUTING_TRANSFORMS
+                for transform in transforms
+            ]
+        )
+        ALL_TRANSFORMS_NON_TOPICROUTING = all(
+            [
+                transform["type"] in self.KNOWN_NONTOPICROUTING_TRANSFORMS
+                for transform in transforms
+            ]
+        )
+
+        if NO_TRANSFORM or ALL_TRANSFORMS_NON_TOPICROUTING:
+            return self.default_get_lineages(
+                database_name=database_name,
+                source_platform=source_platform,
+                topic_prefix=topic_prefix,
+            )
+
+        if SINGLE_TRANSFORM and transforms[0]["type"] == self.REGEXROUTER:
+            tables = self.get_table_names()
+            topic_names = list(self.connector_manifest.topic_names)
+
+            from java.util.regex import Pattern
+
+            for table in tables:
+                source_table: str = table[-1]
+                topic = topic_prefix + source_table if topic_prefix else source_table
+
+                transform_regex = Pattern.compile(transforms[0]["regex"])
+                transform_replacement = transforms[0]["replacement"]
+
+                matcher = transform_regex.matcher(topic)
+                if matcher.matches():
+                    topic = str(matcher.replaceFirst(transform_replacement))
+
+                # Additional check to confirm that the topic present
+                # in connector topics
+
+                if topic in self.connector_manifest.topic_names:
+                    # include schema name for three-level hierarchies
+                    if has_three_level_hierarchy(source_platform) and len(table) > 1:
+                        source_table = f"{table[-2]}.{table[-1]}"
+
+                    dataset_name = get_dataset_name(database_name, source_table)
+
+                    lineage = KafkaConnectLineage(
+                        source_dataset=dataset_name,
+                        source_platform=source_platform,
+                        target_dataset=topic,
+                        target_platform=KAFKA,
+                    )
+                    topic_names.remove(topic)
+                    lineages.append(lineage)
+
+            if topic_names:
+                lineages.extend(
+                    self.default_get_lineages(
+                        database_name=database_name,
+                        source_platform=source_platform,
+                        topic_prefix=topic_prefix,
+                        topic_names=topic_names,
+                        include_source_dataset=False,
+                    )
+                )
+                self.report.warning(
+                    "Could not find input dataset for connector topics",
+                    f"{self.connector_manifest.name} : {topic_names}",
+                )
+            return lineages
+        else:
+            include_source_dataset = True
+            if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
+                self.report.warning(
+                    "Could not find input dataset, connector has unknown transform",
+                    f"{self.connector_manifest.name} : {transforms[0]['type']}",
+                )
+                include_source_dataset = False
+            if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
+                self.report.warning(
+                    "Could not find input dataset, connector has one or more unknown transforms",
+                    self.connector_manifest.name,
+                )
+                include_source_dataset = False
+            lineages = self.default_get_lineages(
+                database_name=database_name,
+                source_platform=source_platform,
+                topic_prefix=topic_prefix,
+                include_source_dataset=include_source_dataset,
+            )
+            return lineages
+
+
+@dataclass
+class MongoSourceConnector(BaseConnector):
+    # https://www.mongodb.com/docs/kafka-connector/current/source-connector/
+
+    @dataclass
+    class MongoSourceParser:
+        db_connection_url: Optional[str]
+        source_platform: str
+        database_name: Optional[str]
+        topic_prefix: Optional[str]
+        transforms: List[str]
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> MongoSourceParser:
+        parser = self.MongoSourceParser(
+            db_connection_url=connector_manifest.config.get("connection.uri"),
+            source_platform="mongodb",
+            database_name=connector_manifest.config.get("database"),
+            topic_prefix=connector_manifest.config.get("topic_prefix"),
+            transforms=(
+                connector_manifest.config["transforms"].split(",")
+                if "transforms" in connector_manifest.config
+                else []
+            ),
+        )
+
+        return parser
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        source_platform = parser.source_platform
+        topic_naming_pattern = r"mongodb\.(\w+)\.(\w+)"
+
+        if not self.connector_manifest.topic_names:
+            return lineages
+
+        for topic in self.connector_manifest.topic_names:
+            found = re.search(re.compile(topic_naming_pattern), topic)
+
+            if found:
+                table_name = get_dataset_name(found.group(1), found.group(2))
+
+                lineage = KafkaConnectLineage(
+                    source_dataset=table_name,
+                    source_platform=source_platform,
+                    target_dataset=topic,
+                    target_platform=KAFKA,
+                )
+                lineages.append(lineage)
+        return lineages
+
+
+@dataclass
+class DebeziumSourceConnector(BaseConnector):
+    @dataclass
+    class DebeziumParser:
+        source_platform: str
+        server_name: Optional[str]
+        database_name: Optional[str]
+
+    def get_server_name(self, connector_manifest: ConnectorManifest) -> str:
+        if "topic.prefix" in connector_manifest.config:
+            return connector_manifest.config["topic.prefix"]
+        else:
+            return connector_manifest.config.get("database.server.name", "")
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> DebeziumParser:
+        connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")
+
+        if connector_class == "io.debezium.connector.mysql.MySqlConnector":
+            parser = self.DebeziumParser(
+                source_platform="mysql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "MySqlConnector":
+            parser = self.DebeziumParser(
+                source_platform="mysql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "io.debezium.connector.mongodb.MongoDbConnector":
+            parser = self.DebeziumParser(
+                source_platform="mongodb",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "io.debezium.connector.postgresql.PostgresConnector":
+            parser = self.DebeziumParser(
+                source_platform="postgres",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.oracle.OracleConnector":
+            parser = self.DebeziumParser(
+                source_platform="oracle",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.sqlserver.SqlServerConnector":
+            database_name = connector_manifest.config.get(
+                "database.names"
+            ) or connector_manifest.config.get("database.dbname")
+
+            if "," in str(database_name):
+                raise Exception(
+                    f"Only one database is supported for Debezium's SQL Server connector. Found: {database_name}"
+                )
+
+            parser = self.DebeziumParser(
+                source_platform="mssql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=database_name,
+            )
+        elif connector_class == "io.debezium.connector.db2.Db2Connector":
+            parser = self.DebeziumParser(
+                source_platform="db2",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.vitess.VitessConnector":
+            parser = self.DebeziumParser(
+                source_platform="vitess",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("vitess.keyspace"),
+            )
+        else:
+            raise ValueError(f"Connector class '{connector_class}' is unknown.")
+
+        return parser
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+
+        try:
+            parser = self.get_parser(self.connector_manifest)
+            source_platform = parser.source_platform
+            server_name = parser.server_name
+            database_name = parser.database_name
+            topic_naming_pattern = rf"({server_name})\.(\w+\.\w+)"
+
+            if not self.connector_manifest.topic_names:
+                return lineages
+
+            for topic in self.connector_manifest.topic_names:
+                found = re.search(re.compile(topic_naming_pattern), topic)
+
+                if found:
+                    table_name = get_dataset_name(database_name, found.group(2))
+
+                    lineage = KafkaConnectLineage(
+                        source_dataset=table_name,
+                        source_platform=source_platform,
+                        target_dataset=topic,
+                        target_platform=KAFKA,
+                    )
+                    lineages.append(lineage)
+            return lineages
+        except Exception as e:
+            self.report.warning(
+                "Error resolving lineage for connector",
+                self.connector_manifest.name,
+                exc=e,
+            )
+
+        return []
+
+
+@dataclass
+class ConfigDrivenSourceConnector(BaseConnector):
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages = []
+        for connector in self.config.generic_connectors:
+            if connector.connector_name == self.connector_manifest.name:
+                target_connector = connector
+                break
+        for topic in self.connector_manifest.topic_names:
+            lineage = KafkaConnectLineage(
+                source_dataset=target_connector.source_dataset,
+                source_platform=target_connector.source_platform,
+                target_dataset=topic,
+                target_platform=KAFKA,
+            )
+            lineages.append(lineage)
+        return lineages
+
+
+JDBC_SOURCE_CONNECTOR_CLASS = "io.confluent.connect.jdbc.JdbcSourceConnector"
+DEBEZIUM_SOURCE_CONNECTOR_PREFIX = "io.debezium.connector"
+MONGO_SOURCE_CONNECTOR_CLASS = "com.mongodb.kafka.connect.MongoSourceConnector"

From 2e544614f12bf2ad8e758b2fd742ee14c6998825 Mon Sep 17 00:00:00 2001
From: sagar-salvi-apptware
 <159135491+sagar-salvi-apptware@users.noreply.github.com>
Date: Thu, 19 Dec 2024 12:41:40 +0530
Subject: [PATCH 8/8]  feat(ingest): add looker meta extractor support in sql
 parsing (#12062)

Co-authored-by: Mayuri N <mayuri.nehate@gslab.com>
Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
---
 .../datahub/configuration/source_common.py    |  13 ++
 .../ingestion/source/looker/looker_common.py  |  56 +++++-
 .../source/looker/looker_lib_wrapper.py       |  14 +-
 .../ingestion/source/looker/looker_source.py  |  13 +-
 .../ingestion/source/powerbi/config.py        |  15 +-
 .../powerbi/dataplatform_instance_resolver.py |   2 +-
 .../source/powerbi/m_query/pattern_handler.py |   2 +-
 .../source/snowflake/snowflake_v2.py          |   1 +
 .../sql_parsing/sql_parsing_aggregator.py     |   2 +-
 .../sql_parsing/tool_meta_extractor.py        | 121 ++++++++++++-
 .../looker/golden_looker_mces.json            |  56 ++++++
 .../looker/golden_test_allow_ingest.json      |  53 ++++++
 ...olden_test_external_project_view_mces.json |  53 ++++++
 .../looker/golden_test_file_path_ingest.json  |  53 ++++++
 ...olden_test_folder_path_pattern_ingest.json |  53 ++++++
 .../golden_test_independent_look_ingest.json  | 170 +++++++++++++-----
 .../looker/golden_test_ingest.json            |  54 ++++++
 .../looker/golden_test_ingest_joins.json      |  53 ++++++
 .../golden_test_ingest_unaliased_joins.json   |  53 ++++++
 ...en_test_non_personal_independent_look.json |  71 ++++++++
 .../looker_mces_golden_deleted_stateful.json  |  68 ++++++-
 .../looker/looker_mces_usage_history.json     |  53 ++++++
 .../tests/integration/looker/test_looker.py   |  20 +++
 .../sql_parsing/test_tool_meta_extractor.py   |  44 ++++-
 .../state/test_redundant_run_skip_handler.py  |   6 +-
 .../platformresource/PlatformResourceType.pdl |   6 +-
 26 files changed, 1026 insertions(+), 79 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index 44c737f1bd13d..8e41e9fb91787 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -63,3 +63,16 @@ class DatasetLineageProviderConfigBase(EnvConfigMixin):
         default=None,
         description="A holder for platform -> platform_instance mappings to generate correct dataset urns",
     )
+
+
+class PlatformDetail(ConfigModel):
+    platform_instance: Optional[str] = Field(
+        default=None,
+        description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
+        "with platform instance name used in ingestion "
+        "recipe of other datahub sources.",
+    )
+    env: str = Field(
+        default=DEFAULT_ENV,
+        description="The environment that all assets produced by DataHub platform ingestion source belong to",
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 57a251ef2ed14..a66962f962255 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -31,6 +31,10 @@
 from pydantic.class_validators import validator
 
 import datahub.emitter.mce_builder as builder
+from datahub.api.entities.platformresource.platform_resource import (
+    PlatformResource,
+    PlatformResourceKey,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
 from datahub.ingestion.api.report import Report
@@ -106,7 +110,7 @@
 from datahub.utilities.url_util import remove_port_from_url
 
 CORPUSER_DATAHUB = "urn:li:corpuser:datahub"
-
+LOOKER = "looker"
 logger = logging.getLogger(__name__)
 
 
@@ -1411,6 +1415,7 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
 
     resolved_user_ids: int = 0
     email_ids_missing: int = 0  # resolved users with missing email addresses
+    looker_user_count: int = 0
 
     _looker_api: Optional[LookerAPI] = None
     query_latency: Dict[str, datetime.timedelta] = dataclasses_field(
@@ -1614,9 +1619,21 @@ def get_urn_dashboard_id(self):
 class LookerUserRegistry:
     looker_api_wrapper: LookerAPI
     fields: str = ",".join(["id", "email", "display_name", "first_name", "last_name"])
+    _user_cache: Dict[str, LookerUser] = {}
 
-    def __init__(self, looker_api: LookerAPI):
+    def __init__(self, looker_api: LookerAPI, report: LookerDashboardSourceReport):
         self.looker_api_wrapper = looker_api
+        self.report = report
+        self._initialize_user_cache()
+
+    def _initialize_user_cache(self) -> None:
+        raw_users: Sequence[User] = self.looker_api_wrapper.all_users(
+            user_fields=self.fields
+        )
+
+        for raw_user in raw_users:
+            looker_user = LookerUser.create_looker_user(raw_user)
+            self._user_cache[str(looker_user.id)] = looker_user
 
     def get_by_id(self, id_: str) -> Optional[LookerUser]:
         if not id_:
@@ -1624,6 +1641,9 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]:
 
         logger.debug(f"Will get user {id_}")
 
+        if str(id_) in self._user_cache:
+            return self._user_cache.get(str(id_))
+
         raw_user: Optional[User] = self.looker_api_wrapper.get_user(
             str(id_), user_fields=self.fields
         )
@@ -1632,3 +1652,35 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]:
 
         looker_user = LookerUser.create_looker_user(raw_user)
         return looker_user
+
+    def to_platform_resource(
+        self, platform_instance: Optional[str]
+    ) -> Iterable[MetadataChangeProposalWrapper]:
+        try:
+            platform_resource_key = PlatformResourceKey(
+                platform=LOOKER,
+                resource_type="USER_ID_MAPPING",
+                platform_instance=platform_instance,
+                primary_key="",
+            )
+
+            # Extract user email mappings
+            user_email_cache = {
+                user_id: user.email
+                for user_id, user in self._user_cache.items()
+                if user.email
+            }
+
+            platform_resource = PlatformResource.create(
+                key=platform_resource_key,
+                value=user_email_cache,
+            )
+
+            self.report.looker_user_count = len(user_email_cache)
+            yield from platform_resource.to_mcps()
+
+        except Exception as exc:
+            self.report.warning(
+                message="Failed to generate platform resource for looker id mappings",
+                exc=exc,
+            )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
index ab55d4e15e5de..c3f2a110136c4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
@@ -68,6 +68,7 @@ class LookerAPIStats(BaseModel):
     get_look_calls: int = 0
     search_looks_calls: int = 0
     search_dashboards_calls: int = 0
+    all_user_calls: int = 0
 
 
 class LookerAPI:
@@ -135,7 +136,7 @@ def get_available_permissions(self) -> Set[str]:
 
         return permissions
 
-    @lru_cache(maxsize=1000)
+    @lru_cache(maxsize=5000)
     def get_user(self, id_: str, user_fields: str) -> Optional[User]:
         self.client_stats.user_calls += 1
         try:
@@ -154,6 +155,17 @@ def get_user(self, id_: str, user_fields: str) -> Optional[User]:
         # User not found
         return None
 
+    def all_users(self, user_fields: str) -> Sequence[User]:
+        self.client_stats.all_user_calls += 1
+        try:
+            return self.client.all_users(
+                fields=cast(str, user_fields),
+                transport_options=self.transport_options,
+            )
+        except SDKError as e:
+            logger.warning(f"Failure was {e}")
+        return []
+
     def execute_query(self, write_query: WriteQuery) -> List[Dict]:
         logger.debug(f"Executing query {write_query}")
         self.client_stats.query_calls += 1
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index cd8ccb8217257..815c5dfb1c014 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -145,7 +145,9 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
         self.source_config: LookerDashboardSourceConfig = config
         self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport()
         self.looker_api: LookerAPI = LookerAPI(self.source_config)
-        self.user_registry: LookerUserRegistry = LookerUserRegistry(self.looker_api)
+        self.user_registry: LookerUserRegistry = LookerUserRegistry(
+            self.looker_api, self.reporter
+        )
         self.explore_registry: LookerExploreRegistry = LookerExploreRegistry(
             self.looker_api, self.reporter, self.source_config
         )
@@ -1673,5 +1675,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 yield usage_mcp.as_workunit()
             self.reporter.report_stage_end("usage_extraction")
 
+        # Dump looker user resource mappings.
+        logger.info("Ingesting looker user resource mapping workunits")
+        self.reporter.report_stage_start("user_resource_extraction")
+        yield from auto_workunit(
+            self.user_registry.to_platform_resource(
+                self.source_config.platform_instance
+            )
+        )
+
     def get_report(self) -> SourceReport:
         return self.reporter
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index f7458c4eb4d5b..b49d40a0c7eb6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -9,7 +9,7 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import DatasetSourceConfigMixin, PlatformDetail
 from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -232,19 +232,6 @@ def default_for_dataset_type_mapping() -> Dict[str, str]:
     return dict_
 
 
-class PlatformDetail(ConfigModel):
-    platform_instance: Optional[str] = pydantic.Field(
-        default=None,
-        description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
-        "with platform instance name used in ingestion "
-        "recipe of other datahub sources.",
-    )
-    env: str = pydantic.Field(
-        default=builder.DEFAULT_ENV,
-        description="The environment that all assets produced by DataHub platform ingestion source belong to",
-    )
-
-
 class DataBricksPlatformDetail(PlatformDetail):
     """
     metastore is an additional field used in Databricks connector to generate the dataset urn
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
index baaa8d5b85ae1..6d51e853a2fb0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
@@ -2,8 +2,8 @@
 from abc import ABC, abstractmethod
 from typing import Union
 
+from datahub.configuration.source_common import PlatformDetail
 from datahub.ingestion.source.powerbi.config import (
-    PlatformDetail,
     PowerBiDashboardSourceConfig,
     PowerBIPlatformDetail,
 )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
index ffaed79f4e42a..63520bd731de8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
@@ -5,13 +5,13 @@
 
 from lark import Tree
 
+from datahub.configuration.source_common import PlatformDetail
 from datahub.emitter import mce_builder as builder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.powerbi.config import (
     Constant,
     DataBricksPlatformDetail,
     DataPlatformPair,
-    PlatformDetail,
     PowerBiDashboardSourceConfig,
     PowerBiDashboardSourceReport,
     PowerBIPlatformDetail,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index c3a7912c40e8e..e5883dd0349a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -540,6 +540,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 identifiers=self.identifiers,
                 schema_resolver=schema_resolver,
                 discovered_tables=discovered_datasets,
+                graph=self.ctx.graph,
             )
 
             # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index 79ea98d1c7f54..f81eb291e89e1 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -490,7 +490,7 @@ def __init__(
             self._exit_stack.push(self._query_usage_counts)
 
         # Tool Extractor
-        self._tool_meta_extractor = ToolMetaExtractor()
+        self._tool_meta_extractor = ToolMetaExtractor.create(graph)
         self.report.tool_meta_report = self._tool_meta_extractor.report
 
     def close(self) -> None:
diff --git a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
index 0d85002776e5e..5af9d9d4f0fff 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
@@ -1,3 +1,4 @@
+import contextlib
 import json
 import logging
 from dataclasses import dataclass, field
@@ -5,8 +6,15 @@
 
 from typing_extensions import Protocol
 
+from datahub.api.entities.platformresource.platform_resource import (
+    ElasticPlatformResourceQuery,
+    PlatformResource,
+    PlatformResourceSearchFields,
+)
 from datahub.ingestion.api.report import Report
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
+from datahub.utilities.search_utils import LogicalOperator
 from datahub.utilities.stats_collections import int_top_k_dict
 
 UrnStr = str
@@ -31,6 +39,7 @@ def _get_last_line(query: str) -> str:
 @dataclass
 class ToolMetaExtractorReport(Report):
     num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
+    failures: List[str] = field(default_factory=list)
 
 
 class ToolMetaExtractor:
@@ -42,14 +51,81 @@ class ToolMetaExtractor:
     by warehouse query logs.
     """
 
-    def __init__(self) -> None:
-        self.report = ToolMetaExtractorReport()
+    def __init__(
+        self,
+        report: ToolMetaExtractorReport,
+        looker_user_mapping: Optional[Dict[str, str]] = None,
+    ) -> None:
+        self.report = report
         self.known_tool_extractors: List[Tuple[str, Callable[[QueryLog], bool]]] = [
             (
                 "mode",
                 self._extract_mode_query,
-            )
+            ),
+            (
+                "looker",
+                self._extract_looker_query,
+            ),
         ]
+        # maps user id (as string) to email address
+        self.looker_user_mapping = looker_user_mapping
+
+    @classmethod
+    def create(
+        cls,
+        graph: Optional[DataHubGraph] = None,
+    ) -> "ToolMetaExtractor":
+        report = ToolMetaExtractorReport()
+        looker_user_mapping = None
+        if graph:
+            try:
+                looker_user_mapping = cls.extract_looker_user_mapping_from_graph(
+                    graph, report
+                )
+            except Exception as e:
+                report.failures.append(
+                    f"Unexpected error during Looker user metadata extraction: {str(e)}"
+                )
+
+        return cls(report, looker_user_mapping)
+
+    @classmethod
+    def extract_looker_user_mapping_from_graph(
+        cls, graph: DataHubGraph, report: ToolMetaExtractorReport
+    ) -> Optional[Dict[str, str]]:
+        looker_user_mapping = None
+        query = (
+            ElasticPlatformResourceQuery.create_from()
+            .group(LogicalOperator.AND)
+            .add_field_match(PlatformResourceSearchFields.PLATFORM, "looker")
+            .add_field_match(
+                PlatformResourceSearchFields.RESOURCE_TYPE,
+                "USER_ID_MAPPING",
+            )
+            .end()
+        )
+        platform_resources = list(
+            PlatformResource.search_by_filters(query=query, graph_client=graph)
+        )
+
+        if len(platform_resources) > 1:
+            report.failures.append(
+                "Looker user metadata extraction failed. Found more than one looker user id mappings."
+            )
+        else:
+            platform_resource = platform_resources[0]
+
+            if (
+                platform_resource
+                and platform_resource.resource_info
+                and platform_resource.resource_info.value
+            ):
+                with contextlib.suppress(ValueError, AssertionError):
+                    value = platform_resource.resource_info.value.as_raw_json()
+                    if value:
+                        looker_user_mapping = value
+
+        return looker_user_mapping
 
     def _extract_mode_query(self, entry: QueryLog) -> bool:
         """
@@ -78,14 +154,49 @@ def _extract_mode_query(self, entry: QueryLog) -> bool:
 
         return True
 
+    def _extract_looker_query(self, entry: QueryLog) -> bool:
+        """
+        Returns:
+            bool: whether QueryLog entry is that of looker and looker user info
+            is extracted into entry.
+        """
+        if not self.looker_user_mapping:
+            return False
+
+        last_line = _get_last_line(entry.query_text)
+
+        if not (last_line.startswith("--") and "Looker Query Context" in last_line):
+            return False
+
+        start_quote_idx = last_line.index("'")
+        end_quote_idx = last_line.rindex("'")
+        if start_quote_idx == -1 or end_quote_idx == -1:
+            return False
+
+        looker_json_raw = last_line[start_quote_idx + 1 : end_quote_idx]
+        looker_json = json.loads(looker_json_raw)
+
+        user_id = str(looker_json["user_id"])
+        email = self.looker_user_mapping.get(user_id)
+        if not email:
+            return False
+
+        original_user = entry.user
+
+        entry.user = email_to_user_urn(email)
+        entry.extra_info = entry.extra_info or {}
+        entry.extra_info["user_via"] = original_user
+
+        return True
+
     def extract_bi_metadata(self, entry: QueryLog) -> bool:
         for tool, meta_extractor in self.known_tool_extractors:
             try:
                 if meta_extractor(entry):
                     self.report.num_queries_meta_extracted[tool] += 1
                     return True
-            except Exception:
-                logger.debug("Tool metadata extraction failed with error : {e}")
+            except Exception as e:
+                logger.debug(f"Tool metadata extraction failed with error : {e}")
         return False
 
 
diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
index a9c445b5986ef..6ae772c134cb3 100644
--- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
@@ -842,6 +842,62 @@
         "pipelineName": "stateful-looker-pipeline"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
index af9c62a2a4180..d7620980a9ced 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
@@ -497,6 +497,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
index b89bc356b48fd..13963af55bfe5 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
@@ -735,6 +735,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
index 810fefd8f6cb8..f11d060102851 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
@@ -735,6 +735,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
index 3d78397f54a23..f6e39dd5286cd 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
@@ -828,6 +828,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
index 5a540e61e768d..203bed843155c 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
@@ -464,6 +464,21 @@
                             "/Folders/Shared"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-1@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -708,6 +723,21 @@
                             "/Folders/Personal"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-2@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -1108,12 +1138,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/sales_model"
+                            "/Explore/data"
                         ]
                     }
                 },
@@ -1126,12 +1156,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "sales_model",
+                            "model": "data",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "sales_explore",
+                            "looker.explore.name": "my_view",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore",
+                        "externalUrl": "https://looker.company.com/explore/data/my_view",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1153,7 +1183,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "sales_explore",
+                        "schemaName": "my_view",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1208,7 +1238,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1227,12 +1257,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore"
+            "renderUrl": "https://looker.company.com/embed/explore/data/my_view"
         }
     },
     "systemMetadata": {
@@ -1244,12 +1274,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
+            "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
         }
     },
     "systemMetadata": {
@@ -1261,7 +1291,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1271,8 +1301,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5",
-                    "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
+                    "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
+                    "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
                 }
             ]
         }
@@ -1287,12 +1317,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/data"
+                            "/Explore/order_model"
                         ]
                     }
                 },
@@ -1305,12 +1335,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "data",
+                            "model": "order_model",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "my_view",
+                            "looker.explore.name": "order_explore",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/data/my_view",
+                        "externalUrl": "https://looker.company.com/explore/order_model/order_explore",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1332,7 +1362,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "my_view",
+                        "schemaName": "order_explore",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1387,7 +1417,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1406,12 +1436,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/data/my_view"
+            "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore"
         }
     },
     "systemMetadata": {
@@ -1423,12 +1453,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
+            "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
         }
     },
     "systemMetadata": {
@@ -1440,7 +1470,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1450,8 +1480,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
-                    "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
+                    "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60",
+                    "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
                 }
             ]
         }
@@ -1466,12 +1496,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/order_model"
+                            "/Explore/sales_model"
                         ]
                     }
                 },
@@ -1484,12 +1514,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "order_model",
+                            "model": "sales_model",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "order_explore",
+                            "looker.explore.name": "sales_explore",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/order_model/order_explore",
+                        "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1511,7 +1541,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "order_explore",
+                        "schemaName": "sales_explore",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1566,7 +1596,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1585,12 +1615,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore"
+            "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore"
         }
     },
     "systemMetadata": {
@@ -1602,12 +1632,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
+            "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
         }
     },
     "systemMetadata": {
@@ -1619,7 +1649,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1629,8 +1659,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60",
-                    "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
+                    "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5",
+                    "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
                 }
             ]
         }
@@ -1705,6 +1735,62 @@
         "pipelineName": "execution-1"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index 9ac95b8482a47..87af50f95ed6b 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -793,6 +793,60 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
index 3a2c6359ea63c..b990ce7c67dab 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
@@ -759,6 +759,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
index 007eee348aeaf..391192b3d16f3 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
@@ -513,6 +513,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
index 859b9163d7aad..4909a6af73a22 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
@@ -464,6 +464,21 @@
                             "/Folders/Shared"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-1@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -1185,6 +1200,62 @@
         "pipelineName": "execution-1"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
index 8256c984afb27..ddeb5428b1d72 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
@@ -762,6 +762,62 @@
         "pipelineName": "stateful-looker-pipeline"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
@@ -814,8 +870,8 @@
     }
 },
 {
-    "entityType": "dashboard",
-    "entityUrn": "urn:li:dashboard:(looker,dashboards.11)",
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -831,8 +887,8 @@
     }
 },
 {
-    "entityType": "chart",
-    "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(looker,dashboards.11)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -865,8 +921,8 @@
     }
 },
 {
-    "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)",
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 0b3530f9c2462..594983c8fb0f2 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -678,6 +678,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py
index 8bbf14709ff9f..a39de8384efb2 100644
--- a/metadata-ingestion/tests/integration/looker/test_looker.py
+++ b/metadata-ingestion/tests/integration/looker/test_looker.py
@@ -83,6 +83,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time):
     with mock.patch("looker_sdk.init40") as mock_sdk:
         mock_sdk.return_value = mocked_client
         setup_mock_dashboard(mocked_client)
+        mocked_client.run_inline_query.side_effect = side_effect_query_inline
         setup_mock_explore(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
@@ -319,6 +320,7 @@ def setup_mock_look(mocked_client):
     mocked_client.all_looks.return_value = [
         Look(
             id="1",
+            user_id="1",
             title="Outer Look",
             description="I am not part of any Dashboard",
             query_id="1",
@@ -327,6 +329,7 @@ def setup_mock_look(mocked_client):
         Look(
             id="2",
             title="Personal Look",
+            user_id="2",
             description="I am not part of any Dashboard and in personal folder",
             query_id="2",
             folder=FolderBase(
@@ -561,6 +564,20 @@ def get_user(
     mocked_client.user.side_effect = get_user
 
 
+def setup_mock_all_user(mocked_client):
+    def all_users(
+        fields: Optional[str] = None,
+        transport_options: Optional[transport.TransportOptions] = None,
+    ) -> List[User]:
+        return [
+            User(id="1", email="test-1@looker.com"),
+            User(id="2", email="test-2@looker.com"),
+            User(id="3", email="test-3@looker.com"),
+        ]
+
+    mocked_client.all_users.side_effect = all_users
+
+
 def side_effect_query_inline(
     result_format: str, body: WriteQuery, transport_options: Optional[TransportOptions]
 ) -> str:
@@ -714,6 +731,7 @@ def test_looker_ingest_usage_history(pytestconfig, tmp_path, mock_time):
         mocked_client.run_inline_query.side_effect = side_effect_query_inline
         setup_mock_explore(mocked_client)
         setup_mock_user(mocked_client)
+        setup_mock_all_user(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
 
@@ -946,6 +964,8 @@ def ingest_independent_looks(
         mock_sdk.return_value = mocked_client
         setup_mock_dashboard(mocked_client)
         setup_mock_explore(mocked_client)
+        setup_mock_user(mocked_client)
+        setup_mock_all_user(mocked_client)
         setup_mock_look(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
index 6f590b5307146..f6566f007f5e6 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
@@ -1,11 +1,14 @@
 from datahub.configuration.datetimes import parse_absolute_time
 from datahub.metadata.urns import CorpUserUrn
 from datahub.sql_parsing.sql_parsing_aggregator import PreparsedQuery
-from datahub.sql_parsing.tool_meta_extractor import ToolMetaExtractor
+from datahub.sql_parsing.tool_meta_extractor import (
+    ToolMetaExtractor,
+    ToolMetaExtractorReport,
+)
 
 
 def test_extract_mode_metadata() -> None:
-    extractor = ToolMetaExtractor()
+    extractor = ToolMetaExtractor(report=ToolMetaExtractorReport())
     query = """\
 select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES
 LIMIT 100
@@ -30,8 +33,42 @@ def test_extract_mode_metadata() -> None:
     assert extractor.report.num_queries_meta_extracted["mode"] == 1
 
 
+def test_extract_looker_metadata() -> None:
+    extractor = ToolMetaExtractor(
+        report=ToolMetaExtractorReport(), looker_user_mapping={"7": "john.doe@xyz.com"}
+    )
+    looker_query = """\
+SELECT
+    all_entities_extended_sibling."ENTITY"  AS "all_entities_extended_sibling.entity_type",
+    COUNT(DISTINCT ( all_entities_extended_sibling."URN" )) AS "all_entities_extended_sibling.distinct_count"
+FROM "PUBLIC"."ALL_ENTITIES"
+     AS all_entities_extended_sibling
+GROUP BY
+    1
+ORDER BY
+    1
+FETCH NEXT 50 ROWS ONLY
+-- Looker Query Context '{"user_id":7,"history_slug":"264797031bc403cf382cbefbe3700849","instance_slug":"32654f2ffadf10b1949d4009e52fc6a4"}'
+"""
+
+    entry = PreparsedQuery(
+        query_id=None,
+        query_text=looker_query,
+        upstreams=[],
+        downstream=None,
+        column_lineage=None,
+        column_usage=None,
+        inferred_schema=None,
+        user=CorpUserUrn("mode"),
+        timestamp=parse_absolute_time("2021-08-01T01:02:03Z"),
+    )
+    assert extractor.extract_bi_metadata(entry)
+    assert entry.user == CorpUserUrn("john.doe")
+    assert extractor.report.num_queries_meta_extracted["looker"] == 1
+
+
 def test_extract_no_metadata() -> None:
-    extractor = ToolMetaExtractor()
+    extractor = ToolMetaExtractor(report=ToolMetaExtractorReport())
     query = """\
 select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES
 LIMIT 100
@@ -53,3 +90,4 @@ def test_extract_no_metadata() -> None:
     assert not extractor.extract_bi_metadata(entry)
 
     assert extractor.report.num_queries_meta_extracted["mode"] == 0
+    assert extractor.report.num_queries_meta_extracted["looker"] == 0
diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
index 85c86f8d205d9..5631ad2c69f94 100644
--- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
+++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
@@ -37,7 +37,11 @@ def stateful_source(mock_datahub_graph: DataHubGraph) -> Iterable[SnowflakeV2Sou
         ),
     )
 
-    with mock.patch("snowflake.connector.connect"):
+    with mock.patch(
+        "datahub.sql_parsing.sql_parsing_aggregator.ToolMetaExtractor.create",
+    ) as mock_checkpoint, mock.patch("snowflake.connector.connect"):
+        mock_checkpoint.return_value = mock.MagicMock()
+
         yield SnowflakeV2Source(ctx=ctx, config=config)
 
 
diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
index 2f36eda9141ab..1a1dbea4359fb 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
@@ -9,9 +9,13 @@ enum PlatformResourceType {
     /**
      * e.g. a Slack member resource, Looker user resource, etc.
      */
-    USER_INFO,
+    USER_INFO, 
     /**
      * e.g. a Slack channel
      */
     CONVERSATION
+    /**
+      * e.g. Looker mapping of all user ids
+      */
+    USER_ID_MAPPING
 }