From 526d6f8b172d2820aee218503d5a5b8c9d844dd9 Mon Sep 17 00:00:00 2001 From: JINSONG WANG Date: Tue, 28 May 2024 23:50:29 -0700 Subject: [PATCH 1/3] Sync with the metrics naming in Traceloop --- .../instana/dc/llm/impl/llm/MetricsCollectorService.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java index 5dfa13d..57b19cf 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java @@ -117,7 +117,9 @@ public void export( switch (metric.getDataCase()) { case SUM: if (metric.getName().compareTo("llm.watsonx.completions.tokens") == 0 || - metric.getName().compareTo("llm.openai.chat_completions.tokens") == 0) { + metric.getName().compareTo("llm.openai.chat_completions.tokens") == 0 || + metric.getName().compareTo("llm.anthropic.completion.tokens") == 0 || + metric.getName().compareTo("gen_ai.client.token.usage") == 0) { List sumDataPoints = metric.getSum().getDataPointsList(); for (NumberDataPoint dataPoint : sumDataPoints) { @@ -159,7 +161,9 @@ public void export( break; case HISTOGRAM: if (metric.getName().compareTo("llm.watsonx.completions.duration") == 0 || - metric.getName().compareTo("llm.openai.chat_completions.duration") == 0) { + metric.getName().compareTo("llm.openai.chat_completions.duration") == 0 || + metric.getName().compareTo("llm.anthropic.completion.duration") == 0 || + metric.getName().compareTo("gen_ai.client.operation.duration") == 0) { List histDataPoints = metric.getHistogram().getDataPointsList(); for (HistogramDataPoint dataPoint : histDataPoints) { From e0340e00dbd767033c52c1b3b668e634c5072620 Mon Sep 17 00:00:00 2001 From: JINSONG WANG Date: Mon, 3 Jun 2024 17:49:03 -0700 Subject: [PATCH 2/3] Update for keeping Traceloop sync and some minor fixes --- llm/README.md | 4 +- llm/config/config.yaml | 9 ++- .../java/com/instana/dc/llm/LLMDcUtil.java | 8 +- .../com/instana/dc/llm/impl/llm/LLMDc.java | 76 +++++++++++++------ .../llm/impl/llm/MetricsCollectorService.java | 73 +++++++++++++----- 5 files changed, 119 insertions(+), 51 deletions(-) diff --git a/llm/README.md b/llm/README.md index 3fffac2..256a788 100644 --- a/llm/README.md +++ b/llm/README.md @@ -30,8 +30,8 @@ vi config/config.yaml The following options are required: - `otel.backend.url`:The OTel gRPC address of the agent, for example: http://localhost:4317 - `otel.service.name`:The Data Collector name, which can be any string you choose. -- `price.prompt.tokens.per.kilo`:The unit price per thousand prompt tokens. -- `price.complete.tokens.per.kilo`:The unit price per thousand complete tokens. +- `*.price.prompt.tokens.per.kilo`:The unit price per thousand prompt tokens. +- `*.price.complete.tokens.per.kilo`:The unit price per thousand complete tokens. ## Run ODCL diff --git a/llm/config/config.yaml b/llm/config/config.yaml index 2f08bad..18b7b8a 100644 --- a/llm/config/config.yaml +++ b/llm/config/config.yaml @@ -4,5 +4,10 @@ instances: - otel.backend.url: http://localhost:4317 otel.service.name: DC1 otel.service.port: 8000 - price.prompt.tokens.per.kilo: 0.0 - price.complete.tokens.per.kilo: 0.0 + #Only configure the settings of the AI provider you are using + watsonx.price.prompt.tokens.per.kilo: 0.0 + watsonx.price.complete.tokens.per.kilo: 0.0 + openai.price.prompt.tokens.per.kilo: 0.0 + openai.price.complete.tokens.per.kilo: 0.0 + anthropic.price.prompt.tokens.per.kilo: 0.0 + anthropic.price.complete.tokens.per.kilo: 0.0 diff --git a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java index 19e538a..e4647dd 100644 --- a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java +++ b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java @@ -15,8 +15,12 @@ public class LLMDcUtil { public static final String DEFAULT_INSTRUMENTATION_SCOPE_VER = "1.0.0"; public static final String SERVICE_NAME = "service.name"; public static final String SERVICE_INSTANCE_ID = "service.instance.id"; - public final static String PRICE_PROMPT_TOKES_PER_KILO = "price.prompt.tokens.per.kilo"; - public final static String PRICE_COMPLETE_TOKES_PER_KILO = "price.complete.tokens.per.kilo"; + public final static String WATSONX_PRICE_PROMPT_TOKES_PER_KILO = "watsonx.price.prompt.tokens.per.kilo"; + public final static String WATSONX_PRICE_COMPLETE_TOKES_PER_KILO = "watsonx.price.complete.tokens.per.kilo"; + public final static String OPENAI_PRICE_PROMPT_TOKES_PER_KILO = "openai.price.prompt.tokens.per.kilo"; + public final static String OPENAI_PRICE_COMPLETE_TOKES_PER_KILO = "openai.price.complete.tokens.per.kilo"; + public final static String ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO = "anthropic.price.prompt.tokens.per.kilo"; + public final static String ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO = "anthropic.price.complete.tokens.per.kilo"; public final static String SERVICE_LISTEN_PORT = "otel.service.port"; /* Configurations for Metrics: diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java index 381bafa..93c8349 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java @@ -29,8 +29,12 @@ public class LLMDc extends AbstractLLMDc { public static final String SENSOR_NAME = "com.instana.plugin.watsonx"; private HashMap modelAggrMap = new HashMap<>(); private MetricsCollectorService metricsCollector = new MetricsCollectorService(); - private Double pricePromptTokens = 0.0; - private Double priceCompleteTokens = 0.0; + private Double watsonxPricePromptTokens = 0.0; + private Double watsonxPriceCompleteTokens = 0.0; + private Double openaiPricePromptTokens = 0.0; + private Double openaiPriceCompleteTokens = 0.0; + private Double anthropicPricePromptTokens = 0.0; + private Double anthropicPriceCompleteTokens = 0.0; private int listenPort = 0; /** @@ -40,7 +44,7 @@ public class LLMDc extends AbstractLLMDc { private class ModelAggregation { private final String modelId; - private final String userId; + private final String aiSystem; private int deltaPromptTokens; private int deltaCompleteTokens; private int deltaDuration; @@ -51,15 +55,15 @@ private class ModelAggregation { private int lastTotalDuration; private int lastTotalReqCount; - public ModelAggregation(String modelId, String userId) { + public ModelAggregation(String modelId, String aiSystem) { this.modelId = modelId; - this.userId = userId; + this.aiSystem = aiSystem; } public String getModelId() { return modelId; } - public String getUserId() { - return userId; + public String getAiSystem() { + return aiSystem; } public void addDeltaPromptTokens(int currTokens) { if(currTokens == 0) { @@ -136,8 +140,12 @@ public void resetMetrics() { public LLMDc(Map properties, CustomDcConfig cdcConfig) throws Exception { super(properties, cdcConfig); - pricePromptTokens = (Double) properties.getOrDefault(PRICE_PROMPT_TOKES_PER_KILO, 0.0); - priceCompleteTokens = (Double) properties.getOrDefault(PRICE_COMPLETE_TOKES_PER_KILO, 0.0); + watsonxPricePromptTokens = (Double) properties.getOrDefault(WATSONX_PRICE_PROMPT_TOKES_PER_KILO, 0.0); + watsonxPriceCompleteTokens = (Double) properties.getOrDefault(WATSONX_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); + openaiPricePromptTokens = (Double) properties.getOrDefault(OPENAI_PRICE_PROMPT_TOKES_PER_KILO, 0.0); + openaiPriceCompleteTokens = (Double) properties.getOrDefault(OPENAI_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); + anthropicPricePromptTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO, 0.0); + anthropicPriceCompleteTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); listenPort = (int) properties.getOrDefault(SERVICE_LISTEN_PORT, 8000); } @@ -192,10 +200,11 @@ public void collectData() { long completeTokens = metric.getCompleteTokens(); double duration = metric.getDuration(); long requestCount = metric.getReqCount(); + String aiSystem = metric.getAiSystem(); ModelAggregation modelAggr = modelAggrMap.get(modelId); if (modelAggr == null) { - modelAggr = new ModelAggregation(modelId, "llmUser"); + modelAggr = new ModelAggregation(modelId, aiSystem); modelAggrMap.put(modelId, modelAggr); } @@ -208,11 +217,11 @@ public void collectData() { } } - System.out.println("-----------------------------------------"); + logger.info("-----------------------------------------"); for(Map.Entry entry : modelAggrMap.entrySet()){ ModelAggregation aggr = entry.getValue(); String modelId = aggr.getModelId(); - String userId = aggr.getUserId(); + String aiSystem = aggr.getAiSystem(); int deltaRequestCount = aggr.getDeltaReqCount(); int deltaDuration = aggr.getDeltaDuration(); int deltaPromptTokens = aggr.getDeltaPromptTokens(); @@ -220,26 +229,45 @@ public void collectData() { int maxDuration = aggr.getMaxDuration(); int avgDuration = deltaDuration/(deltaRequestCount==0?1:deltaRequestCount); - double intervalReqCount = (double)deltaRequestCount/LLM_POLL_INTERVAL; - double intervalPromptTokens = (double)deltaPromptTokens/LLM_POLL_INTERVAL; - double intervalCompleteTokens = (double)deltaCompleteTokens/LLM_POLL_INTERVAL; + + int intervalSeconds = LLM_POLL_INTERVAL; + String agentLess = System.getenv("AGENTLESS_MODE_ENABLED"); + if (agentLess != null) { + intervalSeconds = 1; + } + + double pricePromptTokens = 0.0; + double priceCompleteTokens = 0.0; + if (aiSystem.compareTo("watsonx") == 0) { + pricePromptTokens = watsonxPricePromptTokens; + priceCompleteTokens = watsonxPriceCompleteTokens; + } else if (aiSystem.compareTo("openai") == 0) { + pricePromptTokens = openaiPricePromptTokens; + priceCompleteTokens = openaiPriceCompleteTokens; + } else if (aiSystem.compareTo("anthropic") == 0) { + pricePromptTokens = anthropicPricePromptTokens; + priceCompleteTokens = anthropicPriceCompleteTokens; + } + double intervalReqCount = (double)deltaRequestCount/intervalSeconds; + double intervalPromptTokens = (double)deltaPromptTokens/intervalSeconds; + double intervalCompleteTokens = (double)deltaCompleteTokens/intervalSeconds; double intervalTotalTokens = intervalPromptTokens + intervalCompleteTokens; double intervalPromptCost = (intervalPromptTokens/1000) * pricePromptTokens; double intervalCompleteCost = (intervalCompleteTokens/1000) * priceCompleteTokens; double intervalTotalCost = intervalPromptCost + intervalCompleteCost; aggr.resetMetrics(); - System.out.println("ModelId : " + modelId); - System.out.println("UserId : " + userId); - System.out.println("AvgDuration : " + avgDuration); - System.out.println("MaxDuration : " + maxDuration); - System.out.println("IntervalTokens : " + intervalTotalTokens); - System.out.println("IntervalCost : " + intervalTotalCost); - System.out.println("IntervalRequest : " + intervalReqCount); + logger.info("ModelId : " + modelId); + logger.info("AiSystem : " + aiSystem); + logger.info("AvgDuration : " + avgDuration); + logger.info("MaxDuration : " + maxDuration); + logger.info("IntervalTokens : " + intervalTotalTokens); + logger.info("IntervalCost : " + intervalTotalCost); + logger.info("IntervalRequest : " + intervalReqCount); Map attributes = new HashMap<>(); attributes.put("model_id", modelId); - attributes.put("user_id", userId); + attributes.put("ai_system", aiSystem); getRawMetric(LLM_STATUS_NAME).setValue(1); getRawMetric(LLM_DURATION_NAME).getDataPoint(modelId).setValue(avgDuration, attributes); getRawMetric(LLM_DURATION_MAX_NAME).getDataPoint(modelId).setValue(maxDuration, attributes); @@ -247,6 +275,6 @@ public void collectData() { getRawMetric(LLM_TOKEN_NAME).getDataPoint(modelId).setValue(intervalTotalTokens, attributes); getRawMetric(LLM_REQ_COUNT_NAME).getDataPoint(modelId).setValue(intervalReqCount, attributes); } - System.out.println("-----------------------------------------"); + logger.info("-----------------------------------------"); } } diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java index 57b19cf..04690af 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java @@ -17,8 +17,10 @@ import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingDeque; +import java.util.logging.Logger; class MetricsCollectorService extends MetricsServiceGrpc.MetricsServiceImplBase { + private static final Logger logger = Logger.getLogger(MetricsCollectorService.class.getName()); private Object mutex = new Object(); @@ -28,6 +30,7 @@ public class OtelMetric { private long completeTokens; private double duration; private long requestCount; + private String aiSystem; public String getModelId() { return modelId; @@ -49,6 +52,10 @@ public long getReqCount() { return requestCount; } + public String getAiSystem() { + return aiSystem; + } + public void setModelId(String modelId) { this.modelId = modelId; } @@ -68,6 +75,10 @@ public void setDuration(double duration) { public void setReqCount(long requestCount) { this.requestCount = requestCount; } + + public void setAiSystem(String aiSystem) { + this.aiSystem = aiSystem; + } } private final BlockingQueue exportMetrics = new LinkedBlockingDeque<>(); @@ -87,7 +98,7 @@ public void export( ExportMetricsServiceRequest request, StreamObserver responseObserver) { - System.out.println("--------------------------------------------------------"); + logger.info("--------------------------------------------------------"); synchronized (mutex) { @@ -96,23 +107,23 @@ public void export( Resource resource = resourceMetrics.getResource(); for (KeyValue reskv : resource.getAttributesList()) { - System.out.println("Received metric --- Resource attrKey: " + reskv.getKey()); - System.out.println("Received metric --- Resource attrVal: " + reskv.getValue().getStringValue()); + logger.info("Received metric --- Resource attrKey: " + reskv.getKey()); + logger.info("Received metric --- Resource attrVal: " + reskv.getValue().getStringValue()); } for (ScopeMetrics scoMetrics : resourceMetrics.getScopeMetricsList()) { InstrumentationScope instrumentationScope = scoMetrics.getScope(); instrumentationScope.getAttributesList(); for (KeyValue inskv : instrumentationScope.getAttributesList()) { - System.out.println("Received metric --- Scope attrKey: " + inskv.getKey()); - System.out.println("Received metric --- Scope attrVal: " + inskv.getValue().getStringValue()); + logger.info("Received metric --- Scope attrKey: " + inskv.getKey()); + logger.info("Received metric --- Scope attrVal: " + inskv.getValue().getStringValue()); } for (Metric metric : scoMetrics.getMetricsList()) { - System.out.println("Received metric --- Scope Name: " + metric.getName()); - System.out.println("Received metric --- Scope Desc: " + metric.getDescription()); - System.out.println("Received metric --- Scope Unit: " + metric.getUnit()); - System.out.println("Received metric --- Scope Case: " + metric.getDataCase().getNumber()); + logger.info("Received metric --- Scope Name: " + metric.getName()); + logger.info("Received metric --- Scope Desc: " + metric.getDescription()); + logger.info("Received metric --- Scope Unit: " + metric.getUnit()); + logger.info("Received metric --- Scope Case: " + metric.getDataCase().getNumber()); switch (metric.getDataCase()) { case SUM: @@ -128,30 +139,40 @@ public void export( String modelId = ""; String tokenType = ""; + String aiSystem = ""; for (KeyValue kv : kvList) { - System.out.println("Received metric --- Tokens attrKey: " + kv.getKey()); - System.out.println("Received metric --- Tokens attrVal: " + logger.info("Received metric --- Tokens attrKey: " + kv.getKey()); + logger.info("Received metric --- Tokens attrVal: " + kv.getValue().getStringValue()); if (kv.getKey().compareTo("llm.response.model") == 0 || kv.getKey().compareTo("gen_ai.response.model") == 0) { modelId = kv.getValue().getStringValue(); - } else if (kv.getKey().compareTo("llm.usage.token_type") == 0) { + } else if (kv.getKey().compareTo("llm.usage.token_type") == 0 || kv.getKey().compareTo("gen_ai.token.type") == 0) { tokenType = kv.getValue().getStringValue(); + } else if (kv.getKey().compareTo("gen_ai.system") == 0) { + aiSystem = kv.getValue().getStringValue(); } } + if (aiSystem.isEmpty() && metric.getName().compareTo("gen_ai.client.token.usage") != 0) { + String[] parts = metric.getName().split("\\.", 3); + aiSystem = parts[1]; + } else { + aiSystem = "n/a"; + } long promptTokens = 0; long completeTokens = 0; - if (tokenType.compareTo("prompt") == 0) { + if (tokenType.compareTo("prompt") == 0 || tokenType.compareTo("input") == 0) { promptTokens = dataPoint.getAsInt(); - System.out.println("Received metric --- Prompt Value: " + promptTokens); - } else if (tokenType.compareTo("completion") == 0) { + logger.info("Received metric --- Prompt Value: " + promptTokens); + } else if (tokenType.compareTo("completion") == 0 || tokenType.compareTo("output") == 0) { completeTokens = dataPoint.getAsInt(); - System.out.println("Received metric --- Complete Value: " + completeTokens); + logger.info("Received metric --- Complete Value: " + completeTokens); } if (!modelId.isEmpty()) { OtelMetric otelMetric = new OtelMetric(); otelMetric.setModelId(modelId); + otelMetric.setAiSystem(aiSystem); otelMetric.setPromptTokens(promptTokens); otelMetric.setCompleteTokens(completeTokens); exportMetrics.add(otelMetric); @@ -171,23 +192,33 @@ public void export( List kvList = dataPoint.getAttributesList(); String modelId = ""; + String aiSystem = ""; for (KeyValue kv : kvList) { - System.out.println("Received metric --- Duration attrKey: " + kv.getKey()); - System.out.println("Received metric --- Duration attrVal: " + logger.info("Received metric --- Duration attrKey: " + kv.getKey()); + logger.info("Received metric --- Duration attrVal: " + kv.getValue().getStringValue()); if (kv.getKey().compareTo("llm.response.model") == 0 || kv.getKey().compareTo("gen_ai.response.model") == 0) { modelId = kv.getValue().getStringValue(); + } else if (kv.getKey().compareTo("gen_ai.system") == 0) { + aiSystem = kv.getValue().getStringValue(); } } + if (aiSystem.isEmpty() && metric.getName().compareTo("gen_ai.client.token.usage") != 0) { + String[] parts = metric.getName().split("\\.", 3); + aiSystem = parts[1]; + } else { + aiSystem = "n/a"; + } Double durationSum = dataPoint.getSum(); long requestCount = dataPoint.getCount(); - System.out.println("Received metric --- Duration Sum Value: " + durationSum); - System.out.println("Received metric --- Duration Count Value: " + requestCount); + logger.info("Received metric --- Duration Sum Value: " + durationSum); + logger.info("Received metric --- Duration Count Value: " + requestCount); if (!modelId.isEmpty()) { OtelMetric otelMetric = new OtelMetric(); otelMetric.setModelId(modelId); + otelMetric.setAiSystem(aiSystem); otelMetric.setDuration(durationSum); otelMetric.setReqCount(requestCount); exportMetrics.add(otelMetric); @@ -198,7 +229,7 @@ public void export( case GAUGE: case SUMMARY: default: - System.out.println("Unsupported metric DataCase: " + metric.getDataCase()); + logger.info("Unsupported metric DataCase: " + metric.getDataCase()); throw new AssertionError("Unsupported metric DataCase: " + metric.getDataCase()); } } From 437e5a0e3a8f0bfdf630d8c28139e45f9b9f2216 Mon Sep 17 00:00:00 2001 From: JINSONG WANG Date: Tue, 4 Jun 2024 02:15:22 -0700 Subject: [PATCH 3/3] Use long as integer type and refactor some code --- .../com/instana/dc/llm/impl/llm/LLMDc.java | 128 +++++++++++------- .../llm/impl/llm/MetricsCollectorService.java | 10 +- 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java index 93c8349..91428d5 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java @@ -45,15 +45,15 @@ public class LLMDc extends AbstractLLMDc { private class ModelAggregation { private final String modelId; private final String aiSystem; - private int deltaPromptTokens; - private int deltaCompleteTokens; - private int deltaDuration; - private int deltaReqCount; - private int maxDuration; - private int lastTotalPromptTokens; - private int lastTotalCompleteTokens; - private int lastTotalDuration; - private int lastTotalReqCount; + private long deltaPromptTokens; + private long deltaCompleteTokens; + private long deltaDuration; + private long deltaReqCount; + private long maxDuration; + private long lastTotalPromptTokens; + private long lastTotalCompleteTokens; + private long lastTotalDuration; + private long lastTotalReqCount; public ModelAggregation(String modelId, String aiSystem) { this.modelId = modelId; @@ -65,75 +65,84 @@ public String getModelId() { public String getAiSystem() { return aiSystem; } - public void addDeltaPromptTokens(int currTokens) { + public void addDeltaPromptTokens(long currTokens, long reqCount) { if(currTokens == 0) { return; } - int diffPromptTokens = 0; - if(currTokens > lastTotalPromptTokens && lastTotalPromptTokens != 0) { + long diffPromptTokens = 0; + if(reqCount == 1) { + diffPromptTokens = currTokens; + } else if(currTokens > lastTotalPromptTokens && lastTotalPromptTokens != 0) { diffPromptTokens = currTokens - lastTotalPromptTokens; } lastTotalPromptTokens = currTokens; deltaPromptTokens += diffPromptTokens; } - public int getDeltaPromptTokens() { + public long getDeltaPromptTokens() { return deltaPromptTokens; } - public void addDeltaCompleteTokens(int currTokens) { + public void addDeltaCompleteTokens(long currTokens, long reqCount) { if(currTokens == 0) { return; } - int diffCompleteTokens = 0; - if(currTokens > lastTotalCompleteTokens && lastTotalCompleteTokens != 0) { + long diffCompleteTokens = 0; + if(reqCount == 1) { + diffCompleteTokens = currTokens; + } else if(currTokens > lastTotalCompleteTokens && lastTotalCompleteTokens != 0) { diffCompleteTokens = currTokens - lastTotalCompleteTokens; } lastTotalCompleteTokens = currTokens; deltaCompleteTokens += diffCompleteTokens; } - public int getDeltaCompleteTokens() { + public long getDeltaCompleteTokens() { return deltaCompleteTokens; } - public void addDeltaDuration(int currDuration) { + public void addDeltaDuration(long currDuration, long reqCount) { if(currDuration == 0) { return; } - int diffDuration = 0; - if(currDuration > lastTotalDuration && lastTotalDuration != 0) { + long diffDuration = 0; + if(reqCount == 1) { + diffDuration = currDuration; + } else if(currDuration > lastTotalDuration && lastTotalDuration != 0) { diffDuration = currDuration - lastTotalDuration; } lastTotalDuration = currDuration; deltaDuration += diffDuration; - - if(deltaDuration > maxDuration) { - maxDuration = deltaDuration; - } } - public int getDeltaDuration() { + public long getDeltaDuration() { return deltaDuration; } - public int getMaxDuration() { + public void setMaxDuration(long maxDuration) { + this.maxDuration = maxDuration; + } + public long getMaxDuration() { return maxDuration; } - public void addDeltaReqCount(int currCount) { + public void addDeltaReqCount(long currCount) { if(currCount == 0) { return; } - int diffReqCount = 0; - if(currCount > lastTotalReqCount && lastTotalReqCount != 0) { + long diffReqCount = 0; + if(currCount == 1) { + diffReqCount = currCount; + } else if(currCount > lastTotalReqCount && lastTotalReqCount != 0) { diffReqCount = currCount - lastTotalReqCount; } lastTotalReqCount = currCount; deltaReqCount += diffReqCount; } - public int getDeltaReqCount() { + public long getDeltaReqCount() { return deltaReqCount; } + public long getCurrentReqCount() { + return lastTotalReqCount; + } public void resetMetrics() { deltaPromptTokens = 0; deltaCompleteTokens = 0; deltaDuration = 0; - maxDuration = 0; deltaReqCount = 0; } } @@ -195,23 +204,46 @@ public void collectData() { metricsCollector.clearMetrics(); for (OtelMetric metric : otelMetrics) { try { - String modelId = metric.getModelId(); - long promptTokens = metric.getPromtTokens(); - long completeTokens = metric.getCompleteTokens(); double duration = metric.getDuration(); - long requestCount = metric.getReqCount(); + if(duration == 0.0) { + continue; + } + String modelId = metric.getModelId(); String aiSystem = metric.getAiSystem(); + long requestCount = metric.getReqCount(); ModelAggregation modelAggr = modelAggrMap.get(modelId); if (modelAggr == null) { modelAggr = new ModelAggregation(modelId, aiSystem); modelAggrMap.put(modelId, modelAggr); } - - modelAggr.addDeltaPromptTokens((int)(promptTokens)); - modelAggr.addDeltaCompleteTokens((int)(completeTokens)); - modelAggr.addDeltaDuration((int)(duration*1000)); - modelAggr.addDeltaReqCount((int)(requestCount)); + modelAggr.addDeltaDuration((long)(duration*1000), requestCount); + modelAggr.addDeltaReqCount(requestCount); + } catch (Exception e) { + e.printStackTrace(); + } + } + for (OtelMetric metric : otelMetrics) { + try { + String modelId = metric.getModelId(); + String aiSystem = metric.getAiSystem(); + long promptTokens = metric.getPromtTokens(); + long completeTokens = metric.getCompleteTokens(); + if(promptTokens == 0 && completeTokens == 0) { + continue; + } + ModelAggregation modelAggr = modelAggrMap.get(modelId); + if (modelAggr == null) { + modelAggr = new ModelAggregation(modelId, aiSystem); + modelAggrMap.put(modelId, modelAggr); + } + long currentReqCount = modelAggr.getCurrentReqCount(); + if(promptTokens > 0) { + modelAggr.addDeltaPromptTokens(promptTokens, currentReqCount); + } + if(completeTokens > 0) { + modelAggr.addDeltaCompleteTokens(completeTokens, currentReqCount); + } } catch (Exception e) { e.printStackTrace(); } @@ -222,13 +254,17 @@ public void collectData() { ModelAggregation aggr = entry.getValue(); String modelId = aggr.getModelId(); String aiSystem = aggr.getAiSystem(); - int deltaRequestCount = aggr.getDeltaReqCount(); - int deltaDuration = aggr.getDeltaDuration(); - int deltaPromptTokens = aggr.getDeltaPromptTokens(); - int deltaCompleteTokens = aggr.getDeltaCompleteTokens(); - int maxDuration = aggr.getMaxDuration(); + long deltaRequestCount = aggr.getDeltaReqCount(); + long deltaDuration = aggr.getDeltaDuration(); + long deltaPromptTokens = aggr.getDeltaPromptTokens(); + long deltaCompleteTokens = aggr.getDeltaCompleteTokens(); + long maxDuration = aggr.getMaxDuration(); - int avgDuration = deltaDuration/(deltaRequestCount==0?1:deltaRequestCount); + long avgDuration = deltaDuration/(deltaRequestCount==0?1:deltaRequestCount); + if(avgDuration > maxDuration) { + maxDuration = avgDuration; + aggr.setMaxDuration(maxDuration); + } int intervalSeconds = LLM_POLL_INTERVAL; String agentLess = System.getenv("AGENTLESS_MODE_ENABLED"); diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java index 04690af..3ba3b18 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java @@ -173,8 +173,12 @@ public void export( OtelMetric otelMetric = new OtelMetric(); otelMetric.setModelId(modelId); otelMetric.setAiSystem(aiSystem); - otelMetric.setPromptTokens(promptTokens); - otelMetric.setCompleteTokens(completeTokens); + if(promptTokens > 0) { + otelMetric.setPromptTokens(promptTokens); + } + if(completeTokens > 0) { + otelMetric.setCompleteTokens(completeTokens); + } exportMetrics.add(otelMetric); } } @@ -240,4 +244,4 @@ public void export( responseObserver.onNext(ExportMetricsServiceResponse.getDefaultInstance()); responseObserver.onCompleted(); } -} \ No newline at end of file +}