From 79e4bf2a4c1d3c3f9b0606fd04fe8bd5d1feb57b Mon Sep 17 00:00:00 2001 From: Michael Stead Date: Thu, 25 Nov 2021 15:53:00 -0400 Subject: [PATCH] Consider Event timestamp offset when tallying hourly An hourly metric value returned from prometheus for a query at 12pm - 12:59:59.999, actually represents the metric value calculation from 11am -12pm and therefore we set the event record's start timestamp at 11am. Before this change, our jobs run NOW - 5h for metrics gathering and NOW - 6h for tallying the metrics. Because of this, and the 1h event record timestamp offset, the tally process is querying for metric events that are CURRENTLY being gathered! Lets say that it is 12pm NOW and the jobs run. Metrics will be gathered for 7am - 7:59:59.999 and events will be created and stored with a start timestamp of 6am. The tally runs at the same time and will query for events at 6am, meaning each tally is actually querying for metrics that may or may not exist! This tally should occur at 5am. This patch changes the offset of the tally run by one hour meaning, tally will occur NOW - 7h and metrics gathering will remain at NOW - 5h (remember that metric events will actually be persisted as NOW - 6h). NOTE: I removed the HOURLY_TALLY_OFFSET env var from the metrics gathering job template as it isn't being used when the job runs. --- src/main/resources/application.yaml | 2 +- .../job/CaptureSnapshotsTaskManagerTest.java | 26 +++++++++---------- templates/rhsm-subscriptions-scheduler.yml | 6 ++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index 7a97f79435..f7dc0f4577 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -71,7 +71,7 @@ rhsm-subscriptions: seek-override-end: ${KAFKA_SEEK_OVERRIDE_END:false} seek-override-timestamp: ${KAFKA_SEEK_OVERRIDE_TIMESTAMP:} prometheus-latency-duration: ${PROMETHEUS_LATENCY_DURATION:4h} - hourly-tally-offset: ${HOURLY_TALLY_OFFSET:60m} + hourly-tally-offset: ${HOURLY_TALLY_OFFSET:120m} metric-lookup-range-duration: ${METRIC_LOOKUP_RANGE:1h} subscription-sync-enabled: ${SUBSCRIPTION_SYNC_ENABLED:false} subscription: diff --git a/src/test/java/org/candlepin/subscriptions/tally/job/CaptureSnapshotsTaskManagerTest.java b/src/test/java/org/candlepin/subscriptions/tally/job/CaptureSnapshotsTaskManagerTest.java index d16db61232..d63cf97f5f 100644 --- a/src/test/java/org/candlepin/subscriptions/tally/job/CaptureSnapshotsTaskManagerTest.java +++ b/src/test/java/org/candlepin/subscriptions/tally/job/CaptureSnapshotsTaskManagerTest.java @@ -72,7 +72,7 @@ void testUpdateForSingleAccount() { String account = "12345"; manager.updateAccountSnapshots(account); - verify(queue).enqueue(eq(createDescriptor(account))); + verify(queue).enqueue(createDescriptor(account)); } @Test @@ -82,7 +82,7 @@ void ensureUpdateIsRunForEachAccount() throws Exception { manager.updateSnapshotsForAllAccounts(); - verify(queue, times(1)).enqueue(eq(createDescriptor(expectedAccounts))); + verify(queue, times(1)).enqueue(createDescriptor(expectedAccounts)); } @Test @@ -93,8 +93,8 @@ void ensureAccountListIsPartitionedWhenSendingTaskMessages() throws Exception { manager.updateSnapshotsForAllAccounts(); // NOTE: Partition size is defined in test.properties - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a1", "a2")))); - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a3", "a4")))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a1", "a2"))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a3", "a4"))); } @Test @@ -105,9 +105,9 @@ void ensureLastAccountListPartitionIsIncludedWhenSendingTaskMessages() throws Ex manager.updateSnapshotsForAllAccounts(); // NOTE: Partition size is defined in test.properties - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a1", "a2")))); - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a3", "a4")))); - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a5")))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a1", "a2"))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a3", "a4"))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a5"))); } @Test @@ -117,14 +117,14 @@ void ensureErrorOnUpdateContinuesWithoutFailure() throws Exception { doThrow(new RuntimeException("Forced!")) .when(queue) - .enqueue(eq(createDescriptor(Arrays.asList("a3", "a4")))); + .enqueue(createDescriptor(Arrays.asList("a3", "a4"))); manager.updateSnapshotsForAllAccounts(); - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a1", "a2")))); - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a3", "a4")))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a1", "a2"))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a3", "a4"))); // Even though a3,a4 throws exception, a5,a6 should be enqueued. - verify(queue, times(1)).enqueue(eq(createDescriptor(Arrays.asList("a5", "a6")))); + verify(queue, times(1)).enqueue(createDescriptor(Arrays.asList("a5", "a6"))); } @Test @@ -176,8 +176,8 @@ void testHourlySnapshotTallyOffset() throws Exception { // 2019-05-24T12:35Z truncated to top of the hour - 4 hours prometheus latency // - 1 hour // tally latency - 1 hour metric range - .setSingleValuedArg("startDateTime", "2019-05-24T06:00:00Z") - .setSingleValuedArg("endDateTime", "2019-05-24T07:00:00Z") + .setSingleValuedArg("startDateTime", "2019-05-24T05:00:00Z") + .setSingleValuedArg("endDateTime", "2019-05-24T06:00:00Z") .build()); }); } diff --git a/templates/rhsm-subscriptions-scheduler.yml b/templates/rhsm-subscriptions-scheduler.yml index 31e60b8efc..076212f191 100644 --- a/templates/rhsm-subscriptions-scheduler.yml +++ b/templates/rhsm-subscriptions-scheduler.yml @@ -56,7 +56,7 @@ parameters: - name: SPLUNK_FORWARDER_CPU_LIMIT value: 100m - name: HOURLY_TALLY_OFFSET - value: 60m + value: 120m - name: TOKEN_REFRESHER_IMAGE value: quay.io/observatorium/token-refresher:master-2021-02-05-5da9663 - name: TOKEN_REFRESHER_CPU_REQUEST @@ -110,8 +110,6 @@ objects: value: ${KAFKA_BOOTSTRAP_HOST} - name: OPENSHIFT_METERING_RANGE value: ${OPENSHIFT_METERING_RANGE} - - name: HOURLY_TALLY_OFFSET - value: ${HOURLY_TALLY_OFFSET} - name: DATABASE_HOST valueFrom: secretKeyRef: @@ -392,6 +390,8 @@ objects: value: ${LOGGING_LEVEL} - name: KAFKA_BOOTSTRAP_HOST value: ${KAFKA_BOOTSTRAP_HOST} + - name: HOURLY_TALLY_OFFSET + value: ${HOURLY_TALLY_OFFSET} - name: DATABASE_HOST valueFrom: secretKeyRef: