diff --git a/.env_file b/.env_file
index d943f71b0..15b7a209a 100644
--- a/.env_file
+++ b/.env_file
@@ -14,3 +14,9 @@ MON_PORT=8765
YDB_ENDPOINT=grpc://localhost:2136
YDB_DATABASE=/local
YDB_ANONYMOUS_CREDENTIALS=1
+CLICKHOUSE_DB=test
+CLICKHOUSE_USER=username
+CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1
+CLICKHOUSE_PASSWORD=pass
+SUPERSET_USERNAME=superset
+SUPERSET_PASSWORD=superset
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
index 7707509be..f7a91262a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,4 +1,6 @@
+
* text=auto
+*.png binary
+*.jpg binary
*.py text eol=lf
-*.sh text eol=lf
-* text working-tree-encoding=UTF-8
\ No newline at end of file
+*.sh text eol=lf
\ No newline at end of file
diff --git a/.github/workflows/update_dashboard.yml b/.github/workflows/update_dashboard.yml
new file mode 100644
index 000000000..6beeadd62
--- /dev/null
+++ b/.github/workflows/update_dashboard.yml
@@ -0,0 +1,45 @@
+name: update_dashboard
+
+on:
+ push:
+ branches:
+ - 'dev'
+ paths:
+ - 'dff/utils/docker/dockerfile_stats'
+ - 'dff/utils/docker/entrypoint_stats.sh'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+
+jobs:
+ build_and_publish_dashboard:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+
+ - name: Log in to container registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract Docker metadata
+ uses: docker/metadata-action@v4
+ with:
+ images: ghcr.io/${{ github.repository }}/superset_df_dashboard
+ tags: |
+ type=ref,event=branch
+ type=semver,event={{version}}
+
+ - name: Build and upload image
+ uses: docker/build-push-action@v4
+ with:
+ file: dff/utils/docker/dockerfile_stats
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
index 8bd17ef6f..2bf8bfe27 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,7 @@
include CONTRIBUTING.md
include LICENSE
include README.md
+graft dff/config/superset_dashboard
include dff/context_storages/protocols.json
exclude makefile
diff --git a/dff/config/README.md b/dff/config/README.md
new file mode 100644
index 000000000..f060b14d6
--- /dev/null
+++ b/dff/config/README.md
@@ -0,0 +1,10 @@
+# Superset dashboard config
+
+## Description
+
+This directory provides yaml files for Superset dashboard configuration.
+The files inside are not supposed to be edited manually for lest of compatibility breaks.
+Placeholders inside the files will be filled automatically when you use the
+`dff.stats` CLI command to generate a configuration archive.
+
+Use `dff.stats -h` for more info.
\ No newline at end of file
diff --git a/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml b/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml
new file mode 100644
index 000000000..4fb4c2124
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml
@@ -0,0 +1,78 @@
+slice_name: Flow visit ratio monitor
+description: null
+certified_by: null
+certification_details: null
+viz_type: echarts_timeseries_bar
+params:
+ datasource: 2__table
+ viz_type: echarts_timeseries_bar
+ slice_id: 1
+ granularity_sqla: start_time
+ time_grain_sqla: null
+ time_range: No filter
+ metrics:
+ - aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_waxxrrnkwwm_zudaex5z8bh
+ sqlExpression: null
+ groupby:
+ - flow_label
+ contributionMode: column
+ adhoc_filters: []
+ order_desc: true
+ row_limit: 10000
+ truncate_metric: true
+ show_empty_columns: true
+ comparison_type: values
+ annotation_layers: []
+ forecastPeriods: 10
+ forecastInterval: 0.8
+ orientation: vertical
+ x_axis_title_margin: 15
+ y_axis_title: ''
+ y_axis_title_margin: 50
+ y_axis_title_position: Left
+ color_scheme: supersetColors
+ stack: true
+ only_total: true
+ zoomable: true
+ show_legend: true
+ legendType: scroll
+ legendOrientation: top
+ x_axis_time_format: smart_date
+ y_axis_format: SMART_NUMBER
+ y_axis_bounds:
+ - null
+ - null
+ rich_tooltip: true
+ tooltipTimeFormat: smart_date
+ extra_form_data: {}
+ dashboards:
+ - 1
+query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null},false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label"],"aggregates":{"COUNT(context_id)":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"COUNT(context_id)":null},"level":0,"inplace":true}},{"operation":"contribution","options":{"orientation":"column"}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_bar","slice_id":1,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No
+ filter","metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null}],"groupby":["flow_label"],"contributionMode":"column","adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title_margin":15,"y_axis_title":"","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","stack":true,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","y_axis_format":"SMART_NUMBER","y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: ba02528b-184b-4304-b027-f2b7d9011ab0
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_Visits_2.yaml b/dff/config/superset_dashboard/charts/Node_Visits_2.yaml
new file mode 100644
index 000000000..802288e28
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_Visits_2.yaml
@@ -0,0 +1,59 @@
+slice_name: Node Visits
+description: null
+certified_by: null
+certification_details: null
+viz_type: dist_bar
+params:
+ adhoc_filters: []
+ bottom_margin: auto
+ color_scheme: supersetColors
+ columns:
+ - label
+ datasource: 3__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - request_id
+ metrics:
+ - aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 20
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_l2mle87zvnb_nfqzdxmig1d
+ sqlExpression: null
+ order_desc: true
+ rich_tooltip: true
+ row_limit: 10000
+ show_legend: true
+ time_range: No filter
+ viz_type: dist_bar
+ x_axis_label: History id
+ x_ticks_layout: auto
+ y_axis_bounds:
+ - null
+ - null
+ y_axis_format: SMART_NUMBER
+ y_axis_label: Node visits
+query_context: null
+cache_timeout: null
+uuid: 44f4ab9d-5072-4926-a6ed-8615fb81b3d0
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_counts_3.yaml b/dff/config/superset_dashboard/charts/Node_counts_3.yaml
new file mode 100644
index 000000000..8cf476c2f
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_counts_3.yaml
@@ -0,0 +1,59 @@
+slice_name: Node counts
+description: null
+certified_by: null
+certification_details: null
+viz_type: dist_bar
+params:
+ adhoc_filters: []
+ bar_stacked: true
+ bottom_margin: auto
+ color_scheme: supersetColors
+ columns:
+ - flow_label
+ datasource: 1__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - label
+ metrics:
+ - aggregate: COUNT_DISTINCT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT_DISTINCT(context_id)
+ optionName: metric_axee7fzlpu_upud0bdjv6
+ sqlExpression: null
+ order_bars: true
+ order_desc: true
+ rich_tooltip: true
+ row_limit: 10000
+ show_legend: true
+ time_range: No filter
+ viz_type: dist_bar
+ x_ticks_layout: auto
+ y_axis_bounds:
+ - null
+ - null
+ y_axis_format: SMART_NUMBER
+query_context: null
+cache_timeout: null
+uuid: 0c47c7b5-f500-46cb-97e3-9ebb637f0c8a
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml b/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml
new file mode 100644
index 000000000..93c1bf239
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml
@@ -0,0 +1,86 @@
+slice_name: Node visit ratio monitor
+description: null
+certified_by: null
+certification_details: null
+viz_type: echarts_timeseries_bar
+params:
+ datasource: 2__table
+ viz_type: echarts_timeseries_bar
+ slice_id: 4
+ granularity_sqla: start_time
+ time_grain_sqla: null
+ time_range: No filter
+ metrics:
+ - aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_9yefk3wj2g_5wbp61n0pyr
+ sqlExpression: null
+ groupby:
+ - flow_label
+ - node_label
+ contributionMode: column
+ adhoc_filters: []
+ order_desc: true
+ row_limit: 10000
+ truncate_metric: true
+ show_empty_columns: true
+ comparison_type: values
+ annotation_layers: []
+ forecastPeriods: 10
+ forecastInterval: 0.8
+ orientation: vertical
+ x_axis_title: Datetime
+ x_axis_title_margin: 30
+ y_axis_title: Node visit ratio
+ y_axis_title_margin: 50
+ y_axis_title_position: Left
+ color_scheme: supersetColors
+ stack: true
+ only_total: true
+ zoomable: true
+ show_legend: true
+ legendType: scroll
+ legendOrientation: top
+ x_axis_time_format: smart_date
+ xAxisLabelRotation: 45
+ y_axis_format: SMART_NUMBER
+ logAxis: false
+ minorSplitLine: false
+ truncateYAxis: false
+ y_axis_bounds:
+ - null
+ - null
+ rich_tooltip: true
+ tooltipSortByMetric: true
+ tooltipTimeFormat: smart_date
+ extra_form_data: {}
+ dashboards:
+ - 1
+query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null},false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label","node_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label","node_label"],"aggregates":{"COUNT(context_id)":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"COUNT(context_id)":null},"level":0,"inplace":true}},{"operation":"contribution","options":{"orientation":"column"}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_bar","slice_id":4,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No
+ filter","metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null}],"groupby":["flow_label","node_label"],"contributionMode":"column","adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title":"Datetime","x_axis_title_margin":30,"y_axis_title":"Node
+ visit ratio","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","stack":true,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","xAxisLabelRotation":45,"y_axis_format":"SMART_NUMBER","logAxis":false,"minorSplitLine":false,"truncateYAxis":false,"y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipSortByMetric":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: 6fafe59c-0fec-4cd8-a8b3-c0bfaffb2135
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml b/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml
new file mode 100644
index 000000000..334bc85c7
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml
@@ -0,0 +1,48 @@
+slice_name: Node visits [cloud]
+description: null
+certified_by: null
+certification_details: null
+viz_type: word_cloud
+params:
+ adhoc_filters: []
+ color_scheme: supersetColors
+ datasource: 1__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ metric:
+ aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_mmbslhy6cnd_6zv1lh26whx
+ sqlExpression: null
+ rotation: flat
+ row_limit: 500
+ series: label
+ size_from: 10
+ size_to: 80
+ time_range: No filter
+ viz_type: word_cloud
+query_context: null
+cache_timeout: null
+uuid: b25b4292-ff21-4164-98ac-b1cba95e2994
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml b/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml
new file mode 100644
index 000000000..f3d71f8e9
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml
@@ -0,0 +1,65 @@
+slice_name: Node visits [ratio]
+description: null
+certified_by: null
+certification_details: null
+viz_type: pie
+params:
+ adhoc_filters: []
+ color_scheme: supersetColors
+ datasource: 1__table
+ date_format: smart_date
+ donut: true
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - flow_label
+ - node_label
+ innerRadius: 36
+ label_line: false
+ label_type: key
+ labels_outside: true
+ legendMargin: 100
+ legendOrientation: top
+ legendType: plain
+ metric:
+ aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_lk827d91wws_mq94n624y6
+ sqlExpression: null
+ number_format: SMART_NUMBER
+ outerRadius: 70
+ row_limit: 100
+ show_labels: true
+ show_labels_threshold: 5
+ show_legend: true
+ show_total: true
+ sort_by_metric: true
+ time_range: No filter
+ viz_type: pie
+query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null},false]],"annotation_layers":[],"row_limit":100,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"adhoc_filters":[],"color_scheme":"supersetColors","datasource":"2__table","date_format":"smart_date","donut":true,"extra_form_data":{},"granularity_sqla":"start_time","groupby":["flow_label","node_label"],"innerRadius":36,"label_line":false,"label_type":"key","labels_outside":true,"legendMargin":100,"legendOrientation":"top","legendType":"plain","metric":{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null},"number_format":"SMART_NUMBER","outerRadius":70,"row_limit":100,"show_labels":true,"show_labels_threshold":5,"show_legend":true,"show_total":true,"slice_id":6,"sort_by_metric":true,"time_range":"No
+ filter","viz_type":"pie","force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: f9fb7893-3533-4519-bbc4-1f4853f380e1
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml b/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml
new file mode 100644
index 000000000..b3293a635
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml
@@ -0,0 +1,69 @@
+slice_name: Node visits [sunburst]
+description: null
+certified_by: null
+certification_details: null
+viz_type: sunburst
+params:
+ adhoc_filters:
+ - clause: WHERE
+ comparator: null
+ expressionType: SIMPLE
+ filterOptionName: filter_82x0yx6fkpv_6h8a2190nmh
+ isExtra: false
+ isNew: false
+ operator: IS NOT NULL
+ operatorId: IS_NOT_NULL
+ sqlExpression: null
+ subject: flow_label
+ - clause: WHERE
+ comparator: null
+ expressionType: SIMPLE
+ filterOptionName: filter_653tn7jqmox_x65gwtz29gc
+ isExtra: false
+ isNew: false
+ operator: IS NOT NULL
+ operatorId: IS_NOT_NULL
+ sqlExpression: null
+ subject: node_label
+ color_scheme: supersetColors
+ datasource: 3__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - flow_label
+ - node_label
+ linear_color_scheme: superset_seq_1
+ metric:
+ aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_7mo9cnw40ph_rzxhx01jm0c
+ sqlExpression: null
+ row_limit: 10000
+ slice_id: 8
+ time_range: No filter
+ viz_type: sunburst
+query_context: null
+cache_timeout: null
+uuid: e955f48c-824c-423c-a11c-5b5dca162927
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml b/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml
new file mode 100644
index 000000000..1e087769a
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml
@@ -0,0 +1,83 @@
+slice_name: Service load [max dialogue length]
+description: null
+certified_by: null
+certification_details: null
+viz_type: echarts_timeseries_step
+params:
+ datasource: 2__table
+ viz_type: echarts_timeseries_step
+ slice_id: 8
+ granularity_sqla: start_time
+ time_grain_sqla: null
+ time_range: No filter
+ metrics:
+ - aggregate: null
+ column: null
+ datasourceWarning: false
+ expressionType: SQL
+ hasCustomLabel: false
+ label: AVG(CAST(request_id AS Int16))
+ optionName: metric_8h0zfurdcy_tzo5q0tuczi
+ sqlExpression: AVG(CAST(request_id AS Int16))
+ - aggregate: null
+ column: null
+ datasourceWarning: false
+ expressionType: SQL
+ hasCustomLabel: false
+ label: MAX(CAST(request_id AS Int16))
+ optionName: metric_wz5zx1kqqc_u42eslame2
+ sqlExpression: MAX(CAST(request_id AS Int16))
+ groupby: []
+ adhoc_filters: []
+ order_desc: true
+ row_limit: 10000
+ truncate_metric: true
+ show_empty_columns: true
+ comparison_type: values
+ annotation_layers: []
+ forecastPeriods: 10
+ forecastInterval: 0.8
+ x_axis_title_margin: 15
+ y_axis_title: AVG/MAX dialogue length
+ y_axis_title_margin: 50
+ y_axis_title_position: Left
+ color_scheme: supersetColors
+ seriesType: start
+ only_total: true
+ opacity: 0.2
+ markerSize: 6
+ zoomable: true
+ show_legend: true
+ legendType: scroll
+ legendOrientation: top
+ x_axis_time_format: smart_date
+ rich_tooltip: true
+ tooltipSortByMetric: false
+ tooltipTimeFormat: smart_date
+ y_axis_format: SMART_NUMBER
+ logAxis: false
+ y_axis_bounds:
+ - null
+ - null
+ extra_form_data: {}
+ dashboards:
+ - 1
+query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id
+ AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id
+ AS Int16))"},{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"MAX(CAST(request_id
+ AS Int16))","optionName":"metric_wz5zx1kqqc_u42eslame2","sqlExpression":"MAX(CAST(request_id
+ AS Int16))"}],"orderby":[[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id
+ AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id
+ AS Int16))"},false]],"annotation_layers":[],"row_limit":10000,"series_columns":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":[],"aggregates":{"AVG(CAST(request_id
+ AS Int16))":{"operator":"mean"},"MAX(CAST(request_id AS Int16))":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_step","slice_id":8,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No
+ filter","metrics":[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id
+ AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id
+ AS Int16))"},{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"MAX(CAST(request_id
+ AS Int16))","optionName":"metric_wz5zx1kqqc_u42eslame2","sqlExpression":"MAX(CAST(request_id
+ AS Int16))"}],"groupby":[],"adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"x_axis_title_margin":15,"y_axis_title":"AVG/MAX
+ dialogue length","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","seriesType":"start","only_total":true,"opacity":0.2,"markerSize":6,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","rich_tooltip":true,"tooltipSortByMetric":false,"tooltipTimeFormat":"smart_date","y_axis_format":"SMART_NUMBER","logAxis":false,"y_axis_bounds":[null,null],"extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: 276c3aa7-89bc-49ff-91b6-6232ae35c854
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Service_load_users_9.yaml b/dff/config/superset_dashboard/charts/Service_load_users_9.yaml
new file mode 100644
index 000000000..126af9af3
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Service_load_users_9.yaml
@@ -0,0 +1,62 @@
+slice_name: Service load [users]
+description: null
+certified_by: null
+certification_details: null
+viz_type: big_number
+params:
+ datasource: 2__table
+ viz_type: big_number
+ slice_id: 4
+ granularity_sqla: start_time
+ time_grain_sqla: PT5M
+ time_range: No filter
+ metric:
+ expressionType: SIMPLE
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 12
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: String
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ aggregate: COUNT_DISTINCT
+ sqlExpression: null
+ datasourceWarning: false
+ hasCustomLabel: false
+ label: COUNT_DISTINCT(context_id)
+ optionName: metric_5554up52jgr_laajjk3aaim
+ adhoc_filters: []
+ show_timestamp: false
+ show_trend_line: true
+ start_y_axis_at_zero: true
+ color_picker:
+ a: 1
+ b: 135
+ g: 122
+ r: 0
+ header_font_size: 0.3
+ subheader_font_size: 0.15
+ y_axis_format: SMART_NUMBER
+ time_format: smart_date
+ force_timestamp_formatting: false
+ rolling_type: None
+ extra_form_data: {}
+ dashboards:
+ - 1
+query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":"PT5M","having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"expressionType":"SIMPLE","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":12,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"aggregate":"COUNT_DISTINCT","sqlExpression":null,"datasourceWarning":false,"hasCustomLabel":false,"label":"COUNT_DISTINCT(context_id)","optionName":"metric_5554up52jgr_laajjk3aaim"}],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":[],"aggregates":{"COUNT_DISTINCT(context_id)":{"operator":"mean"}},"drop_missing_columns":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"big_number","slice_id":4,"granularity_sqla":"start_time","time_grain_sqla":"PT5M","time_range":"No
+ filter","metric":{"expressionType":"SIMPLE","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":12,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"aggregate":"COUNT_DISTINCT","sqlExpression":null,"datasourceWarning":false,"hasCustomLabel":false,"label":"COUNT_DISTINCT(context_id)","optionName":"metric_5554up52jgr_laajjk3aaim"},"adhoc_filters":[],"show_timestamp":false,"show_trend_line":true,"start_y_axis_at_zero":true,"color_picker":{"a":1,"b":135,"g":122,"r":0},"header_font_size":0.3,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","force_timestamp_formatting":false,"rolling_type":"None","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: b5d43314-514c-464e-9fcc-f897e3ae0963
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Table_10.yaml b/dff/config/superset_dashboard/charts/Table_10.yaml
new file mode 100644
index 000000000..b3f9f636c
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Table_10.yaml
@@ -0,0 +1,34 @@
+slice_name: Table
+description: null
+certified_by: null
+certification_details: null
+viz_type: table
+params:
+ adhoc_filters: []
+ all_columns: []
+ color_pn: true
+ datasource: 3__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - context_id
+ - start_time
+ - data_key
+ - data
+ order_by_cols: []
+ order_desc: false
+ percent_metrics: []
+ query_mode: aggregate
+ row_limit: 10000
+ server_page_length: 10
+ show_cell_bars: true
+ slice_id: 14
+ table_timestamp_format: smart_date
+ time_grain_sqla: null
+ time_range: No filter
+ viz_type: table
+query_context: null
+cache_timeout: null
+uuid: 38d353dc-c0ef-41a0-97c7-3dcbebab9e02
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml b/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml
new file mode 100644
index 000000000..76daa1499
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml
@@ -0,0 +1,56 @@
+slice_name: Terminal labels
+description: null
+certified_by: null
+certification_details: null
+viz_type: echarts_timeseries_bar
+params:
+ datasource: 1__table
+ viz_type: echarts_timeseries_bar
+ slice_id: 11
+ granularity_sqla: start_time
+ time_grain_sqla: null
+ time_range: No filter
+ metrics:
+ - count
+ groupby:
+ - flow_label
+ - node_label
+ contributionMode: null
+ adhoc_filters: []
+ order_desc: true
+ row_limit: 10000
+ truncate_metric: true
+ show_empty_columns: true
+ comparison_type: values
+ annotation_layers: []
+ forecastPeriods: 10
+ forecastInterval: 0.8
+ orientation: vertical
+ x_axis_title_margin: 15
+ y_axis_title_margin: 15
+ y_axis_title_position: Left
+ color_scheme: supersetColors
+ show_value: false
+ stack: false
+ only_total: true
+ zoomable: true
+ show_legend: true
+ legendType: scroll
+ legendOrientation: top
+ x_axis_time_format: smart_date
+ y_axis_format: SMART_NUMBER
+ y_axis_bounds:
+ - null
+ - null
+ rich_tooltip: true
+ tooltipTimeFormat: smart_date
+ extra_form_data: {}
+ dashboards:
+ - 1
+query_context: '{"datasource":{"id":1,"type":"table"},"force":false,"queries":[{"time_range":"No
+ filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":["count"],"orderby":[["count",false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label","node_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label","node_label"],"aggregates":{"count":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"count":null},"level":0,"inplace":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"1__table","viz_type":"echarts_timeseries_bar","slice_id":11,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No
+ filter","metrics":["count"],"groupby":["flow_label","node_label"],"contributionMode":null,"adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title_margin":15,"y_axis_title_margin":15,"y_axis_title_position":"Left","color_scheme":"supersetColors","show_value":false,"stack":false,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","y_axis_format":"SMART_NUMBER","y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
+cache_timeout: null
+uuid: cf066e41-a9e8-4f54-a875-ebf4da350b59
+version: 1.0.0
+dataset_uuid: d7f6546e-1e3a-479d-8531-05b5e73e5c05
diff --git a/dff/config/superset_dashboard/charts/Transition_counts_12.yaml b/dff/config/superset_dashboard/charts/Transition_counts_12.yaml
new file mode 100644
index 000000000..f44c72191
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Transition_counts_12.yaml
@@ -0,0 +1,63 @@
+slice_name: Transition counts
+description: null
+certified_by: null
+certification_details: null
+viz_type: dist_bar
+params:
+ adhoc_filters: []
+ bar_stacked: true
+ bottom_margin: auto
+ color_scheme: supersetColors
+ columns:
+ - flow_label
+ datasource: 1__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby:
+ - prev_label
+ - label
+ metrics:
+ - aggregate: COUNT_DISTINCT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT_DISTINCT(context_id)
+ optionName: metric_tc3lb0a1pff_dyroibp08h7
+ sqlExpression: null
+ order_desc: true
+ rich_tooltip: true
+ row_limit: 10000
+ show_legend: true
+ slice_id: 6
+ time_range: No filter
+ viz_type: dist_bar
+ x_axis_label: Transitions
+ x_ticks_layout: auto
+ y_axis_bounds:
+ - null
+ - null
+ y_axis_format: SMART_NUMBER
+ y_axis_label: Counts
+ y_axis_showminmax: false
+query_context: null
+cache_timeout: null
+uuid: 9fcc7cc1-9257-4c0d-b377-b3a60a8bf3df
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Transition_layout_13.yaml b/dff/config/superset_dashboard/charts/Transition_layout_13.yaml
new file mode 100644
index 000000000..5999b30ed
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Transition_layout_13.yaml
@@ -0,0 +1,62 @@
+slice_name: Transition layout
+description: null
+certified_by: null
+certification_details: null
+viz_type: graph_chart
+params:
+ adhoc_filters: []
+ baseEdgeWidth: 3
+ baseNodeSize: 20
+ color_scheme: supersetColors
+ datasource: 1__table
+ draggable: false
+ edgeLength: 400
+ edgeSymbol: none,arrow
+ extra_form_data: {}
+ friction: 0.2
+ granularity_sqla: start_time
+ gravity: 0.3
+ layout: force
+ legendOrientation: top
+ legendType: scroll
+ metric:
+ aggregate: COUNT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT(context_id)
+ optionName: metric_qxsyaujh63_4b75kyx6b5g
+ sqlExpression: null
+ repulsion: 1000
+ roam: scale
+ row_limit: 10000
+ selectedMode: single
+ show_legend: true
+ source: prev_label
+ source_category: flow_label
+ target: label
+ target_category: flow_label
+ time_range: No filter
+ viz_type: graph_chart
+query_context: null
+cache_timeout: null
+uuid: 801e8c66-f693-46e3-a9a5-7b2b0b08a4a7
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml b/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml
new file mode 100644
index 000000000..e9d1f8723
--- /dev/null
+++ b/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml
@@ -0,0 +1,48 @@
+slice_name: Transition ratio [chord]
+description: null
+certified_by: null
+certification_details: null
+viz_type: chord
+params:
+ adhoc_filters: []
+ color_scheme: bnbColors
+ columns: prev_label
+ datasource: 1__table
+ extra_form_data: {}
+ granularity_sqla: start_time
+ groupby: label
+ metric:
+ aggregate: COUNT_DISTINCT
+ column:
+ advanced_data_type: null
+ certification_details: null
+ certified_by: null
+ column_name: context_id
+ description: null
+ expression: null
+ filterable: true
+ groupby: true
+ id: 1
+ is_certified: false
+ is_dttm: false
+ python_date_format: null
+ type: STRING
+ type_generic: 1
+ verbose_name: null
+ warning_markdown: null
+ expressionType: SIMPLE
+ hasCustomLabel: false
+ isNew: false
+ label: COUNT_DISTINCT(context_id)
+ optionName: metric_97lz5hqft8j_aqofzpt1ma5
+ sqlExpression: null
+ row_limit: 10000
+ slice_id: 13
+ time_range: No filter
+ viz_type: chord
+ y_axis_format: ~g
+query_context: null
+cache_timeout: null
+uuid: 388d2359-8d13-4795-8dc9-1cb5dfa92ee1
+version: 1.0.0
+dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
diff --git a/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml b/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml
new file mode 100644
index 000000000..0523b8ae1
--- /dev/null
+++ b/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml
@@ -0,0 +1,555 @@
+dashboard_title: DFF Stats
+description: null
+css: ''
+slug: dff-stats
+uuid: 68bce374-99bc-4890-b8c2-cb172409b894
+position:
+ CHART-Af3zvLsiKV:
+ children: []
+ id: CHART-Af3zvLsiKV
+ meta:
+ chartId: 4
+ height: 66
+ sliceName: Node visit ratio monitor
+ uuid: 6fafe59c-0fec-4cd8-a8b3-c0bfaffb2135
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ - ROW-7g6_n72hZ
+ type: CHART
+ CHART-CuCYDOlGEu:
+ children: []
+ id: CHART-CuCYDOlGEu
+ meta:
+ chartId: 10
+ height: 50
+ sliceName: Table
+ sliceNameOverride: Stats table
+ uuid: 38d353dc-c0ef-41a0-97c7-3dcbebab9e02
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-JCU6rANFP
+ - ROW-EaYAQjv4W
+ type: CHART
+ CHART-DxY0c3RnIv:
+ children: []
+ id: CHART-DxY0c3RnIv
+ meta:
+ chartId: 3
+ height: 61
+ sliceName: Node counts
+ uuid: 0c47c7b5-f500-46cb-97e3-9ebb637f0c8a
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-VUknWnOAy
+ - ROW-PvToekFjO
+ type: CHART
+ CHART-explore-10-1:
+ children: []
+ id: CHART-explore-10-1
+ meta:
+ chartId: 13
+ height: 73
+ sliceName: Transition layout
+ sliceNameOverride: Dialogue layout
+ uuid: 801e8c66-f693-46e3-a9a5-7b2b0b08a4a7
+ width: 6
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-JCU6rANFP
+ - ROW-Ccs1FbcI-
+ type: CHART
+ CHART-explore-11-1:
+ children: []
+ id: CHART-explore-11-1
+ meta:
+ chartId: 5
+ height: 56
+ sliceName: Node visits [cloud]
+ uuid: b25b4292-ff21-4164-98ac-b1cba95e2994
+ width: 8
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ - ROW-rR4J0eAhh
+ type: CHART
+ CHART-explore-13-1:
+ children: []
+ id: CHART-explore-13-1
+ meta:
+ chartId: 14
+ height: 74
+ sliceName: Transition ratio [chord]
+ uuid: 388d2359-8d13-4795-8dc9-1cb5dfa92ee1
+ width: 6
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-JCU6rANFP
+ - ROW-Ccs1FbcI-
+ type: CHART
+ CHART-explore-14-1:
+ children: []
+ id: CHART-explore-14-1
+ meta:
+ chartId: 7
+ height: 105
+ sliceName: Node visits [sunburst]
+ uuid: e955f48c-824c-423c-a11c-5b5dca162927
+ width: 6
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ - ROW-ZcN9G9RL5
+ type: CHART
+ CHART-explore-20-1:
+ children: []
+ id: CHART-explore-20-1
+ meta:
+ chartId: 2
+ height: 50
+ sliceName: Node Visits
+ uuid: 44f4ab9d-5072-4926-a6ed-8615fb81b3d0
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ - ROW-ksepbeQu6
+ type: CHART
+ CHART-explore-21-1:
+ children: []
+ id: CHART-explore-21-1
+ meta:
+ chartId: 1
+ height: 60
+ sliceName: Flow visit ratio monitor
+ uuid: ba02528b-184b-4304-b027-f2b7d9011ab0
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ - ROW-ZTVWOu2o0
+ type: CHART
+ CHART-explore-22-1:
+ children: []
+ id: CHART-explore-22-1
+ meta:
+ chartId: 11
+ height: 61
+ sliceName: Terminal labels
+ sliceNameOverride: Terminal labels monitor
+ uuid: cf066e41-a9e8-4f54-a875-ebf4da350b59
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ - ROW-LhKWfqM7V
+ type: CHART
+ CHART-explore-9-1:
+ children: []
+ id: CHART-explore-9-1
+ meta:
+ chartId: 6
+ height: 105
+ sliceName: Node visits [ratio]
+ uuid: f9fb7893-3533-4519-bbc4-1f4853f380e1
+ width: 6
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ - ROW-ZcN9G9RL5
+ type: CHART
+ CHART-mYC2udeF3a:
+ children: []
+ id: CHART-mYC2udeF3a
+ meta:
+ chartId: 9
+ height: 50
+ sliceName: Service load [users]
+ uuid: b5d43314-514c-464e-9fcc-f897e3ae0963
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ - ROW-Ae7v1prsp2
+ type: CHART
+ CHART-wci7CHiza6:
+ children: []
+ id: CHART-wci7CHiza6
+ meta:
+ chartId: 12
+ height: 66
+ sliceName: Transition counts
+ uuid: 9fcc7cc1-9257-4c0d-b377-b3a60a8bf3df
+ width: 12
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-VUknWnOAy
+ - ROW-TRNUnY9X_Y
+ type: CHART
+ COLUMN-JRaDi96UVP:
+ children: []
+ id: COLUMN-JRaDi96UVP
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ width: 2
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ - ROW-rR4J0eAhh
+ type: COLUMN
+ COLUMN-WysgPuf0P1:
+ children: []
+ id: COLUMN-WysgPuf0P1
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ width: 2
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ - ROW-rR4J0eAhh
+ type: COLUMN
+ DASHBOARD_VERSION_KEY: v2
+ GRID_ID:
+ children:
+ - HEADER-nYVwogYInk
+ - TABS-Xoi5oUBxZI
+ id: GRID_ID
+ parents:
+ - ROOT_ID
+ type: GRID
+ HEADER-nYVwogYInk:
+ children: []
+ id: HEADER-nYVwogYInk
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ headerSize: LARGE_HEADER
+ text: DFF Stats
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ type: HEADER
+ HEADER_ID:
+ id: HEADER_ID
+ meta:
+ text: DFF Stats
+ type: HEADER
+ ROOT_ID:
+ children:
+ - GRID_ID
+ id: ROOT_ID
+ type: ROOT
+ ROW-7g6_n72hZ:
+ children:
+ - CHART-Af3zvLsiKV
+ id: ROW-7g6_n72hZ
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ type: ROW
+ ROW-Ae7v1prsp2:
+ children:
+ - CHART-mYC2udeF3a
+ id: ROW-Ae7v1prsp2
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ type: ROW
+ ROW-Ccs1FbcI-:
+ children:
+ - CHART-explore-10-1
+ - CHART-explore-13-1
+ id: ROW-Ccs1FbcI-
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-JCU6rANFP
+ type: ROW
+ ROW-EaYAQjv4W:
+ children:
+ - CHART-CuCYDOlGEu
+ id: ROW-EaYAQjv4W
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-JCU6rANFP
+ type: ROW
+ ROW-LhKWfqM7V:
+ children:
+ - CHART-explore-22-1
+ id: ROW-LhKWfqM7V
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ type: ROW
+ ROW-PvToekFjO:
+ children:
+ - CHART-DxY0c3RnIv
+ id: ROW-PvToekFjO
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-VUknWnOAy
+ type: ROW
+ ROW-TRNUnY9X_Y:
+ children:
+ - CHART-wci7CHiza6
+ id: ROW-TRNUnY9X_Y
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-VUknWnOAy
+ type: ROW
+ ROW-ZTVWOu2o0:
+ children:
+ - CHART-explore-21-1
+ id: ROW-ZTVWOu2o0
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ type: ROW
+ ROW-ZcN9G9RL5:
+ children:
+ - CHART-explore-9-1
+ - CHART-explore-14-1
+ id: ROW-ZcN9G9RL5
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ type: ROW
+ ROW-hfJk2ddHi:
+ children:
+ - CHART-explore-20-1
+ id: ROW-hfJk2ddHi
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-VUknWnOAy
+ type: ROW
+ ROW-ksepbeQu6:
+ children:
+ - CHART-explore-20-1
+ id: ROW-ksepbeQu6
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-6zE8noCIsx
+ type: ROW
+ ROW-rR4J0eAhh:
+ children:
+ - COLUMN-JRaDi96UVP
+ - CHART-explore-11-1
+ - COLUMN-WysgPuf0P1
+ id: ROW-rR4J0eAhh
+ meta:
+ background: BACKGROUND_TRANSPARENT
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ - TAB-Gw0Ffh1lG
+ type: ROW
+ TAB-6zE8noCIsx:
+ children:
+ - ROW-Ae7v1prsp2
+ - ROW-ksepbeQu6
+ - ROW-ZTVWOu2o0
+ - ROW-7g6_n72hZ
+ - ROW-LhKWfqM7V
+ id: TAB-6zE8noCIsx
+ meta:
+ defaultText: Tab title
+ placeholder: Tab title
+ text: Service stats
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ type: TAB
+ TAB-Gw0Ffh1lG:
+ children:
+ - ROW-ZcN9G9RL5
+ - ROW-rR4J0eAhh
+ id: TAB-Gw0Ffh1lG
+ meta:
+ defaultText: Tab title
+ placeholder: Tab title
+ text: General stats
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ type: TAB
+ TAB-JCU6rANFP:
+ children:
+ - ROW-EaYAQjv4W
+ - ROW-Ccs1FbcI-
+ id: TAB-JCU6rANFP
+ meta:
+ defaultText: Tab title
+ placeholder: Tab title
+ text: Overview
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ type: TAB
+ TAB-VUknWnOAy:
+ children:
+ - ROW-PvToekFjO
+ - ROW-hfJk2ddHi
+ - ROW-TRNUnY9X_Y
+ id: TAB-VUknWnOAy
+ meta:
+ defaultText: Tab title
+ placeholder: Tab title
+ text: Additional stats
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ - TABS-Xoi5oUBxZI
+ type: TAB
+ TABS-Xoi5oUBxZI:
+ children:
+ - TAB-JCU6rANFP
+ - TAB-Gw0Ffh1lG
+ - TAB-VUknWnOAy
+ - TAB-6zE8noCIsx
+ id: TABS-Xoi5oUBxZI
+ meta: {}
+ parents:
+ - ROOT_ID
+ - GRID_ID
+ type: TABS
+metadata:
+ color_scheme: echarts4Colors
+ label_colors: {}
+ shared_label_colors:
+ 'greeting_flow: node1': '#c23531'
+ 'greeting_flow: node2': '#2f4554'
+ 'greeting_flow: node3': '#61a0a8'
+ 'greeting_flow: node4': '#d48265'
+ greeting_flow, node1: '#c23531'
+ greeting_flow, node2: '#2f4554'
+ greeting_flow, node3: '#61a0a8'
+ greeting_flow, node4: '#d48265'
+ greeting_flow: '#c23531'
+ AVG(CAST(request_id AS Int16)): '#c23531'
+ MAX(CAST(request_id AS Int16)): '#2f4554'
+ root: '#c23531'
+ node2: '#61a0a8'
+ node4: '#d48265'
+ node3: '#91c7ae'
+ node1: '#749f83'
+ ? ''
+ : '#c23531'
+ timed_refresh_immune_slices: []
+ expanded_slices: {}
+ refresh_frequency: 1800
+ show_native_filters: true
+ default_filters: '{}'
+ chart_configuration: {}
+ color_scheme_domain:
+ - '#c23531'
+ - '#2f4554'
+ - '#61a0a8'
+ - '#d48265'
+ - '#91c7ae'
+ - '#749f83'
+ - '#ca8622'
+ - '#bda29a'
+ - '#6e7074'
+ - '#546570'
+ - '#c4ccd3'
+ cross_filters_enabled: false
+ native_filter_configuration:
+ - id: NATIVE_FILTER-5vyW3SgU5
+ controlValues:
+ enableEmptyFilter: false
+ name: Time grain
+ filterType: filter_timegrain
+ targets:
+ - datasetUuid: fda98ab8-f550-45f1-9ded-0113f3e67260
+ defaultDataMask:
+ extraFormData: {}
+ filterState: {}
+ ownState: {}
+ cascadeParentIds: []
+ scope:
+ rootPath:
+ - TAB-6zE8noCIsx
+ excluded:
+ - 7
+ type: NATIVE_FILTER
+ description: ''
+version: 1.0.0
diff --git a/dff/config/superset_dashboard/databases/dff_database.yaml b/dff/config/superset_dashboard/databases/dff_database.yaml
new file mode 100644
index 000000000..b178a0f80
--- /dev/null
+++ b/dff/config/superset_dashboard/databases/dff_database.yaml
@@ -0,0 +1,13 @@
+database_name: dff_database
+sqlalchemy_uri: clickhousedb+connect://username:XXXXXXXXXX@clickhouse:8123/test
+cache_timeout: null
+expose_in_sqllab: true
+allow_run_async: false
+allow_ctas: false
+allow_cvas: false
+allow_dml: false
+allow_file_upload: false
+extra:
+ allows_virtual_table_explore: true
+uuid: ee32f76e-55e3-483a-935a-22d03072db23
+version: 1.0.0
diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml b/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml
new file mode 100644
index 000000000..a07fdd800
--- /dev/null
+++ b/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml
@@ -0,0 +1,105 @@
+table_name: dff_final_nodes
+main_dttm_col: null
+description: null
+default_endpoint: null
+offset: 0
+cache_timeout: null
+schema: test
+sql: "\nWITH main AS (\n SELECT LogAttributes['context_id'] AS context_id,\n \
+ \ max(LogAttributes['request_id']) AS max_history\n FROM otel_logs\nGROUP BY\
+ \ context_id\n)\nSELECT DISTINCT LogAttributes['context_id'] AS context_id,\nLogAttributes['request_id']\
+ \ AS request_id,\notel_logs.Timestamp AS start_time,\nJSON_VALUE(otel_logs.Body,\
+ \ '$.label') AS label,\nJSON_VALUE(otel_logs.Body, '$.flow') AS flow_label,\nJSON_VALUE(otel_logs.Body,\
+ \ '$.node') AS node_label\nFROM otel_logs\nINNER JOIN main\nON context_id = main.context_id\n\
+ AND request_id = main.max_history\nINNER JOIN otel_traces\nON otel_logs.TraceId\
+ \ = otel_traces.TraceId\nWHERE otel_traces.SpanName = 'get_current_label'\n"
+params: null
+template_params: null
+filter_select_enabled: false
+fetch_values_predicate: null
+extra: null
+uuid: d7f6546e-1e3a-479d-8531-05b5e73e5c05
+metrics:
+- metric_name: count
+ verbose_name: null
+ metric_type: null
+ expression: count(*)
+ description: null
+ d3format: null
+ extra: {}
+ warning_text: null
+columns:
+- column_name: start_time
+ verbose_name: null
+ is_dttm: true
+ is_active: true
+ type: DateTime64(9)
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: node_label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: flow_label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: context_id
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: request_id
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+version: 1.0.0
+database_uuid: ee32f76e-55e3-483a-935a-22d03072db23
diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml b/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml
new file mode 100644
index 000000000..5ddab7c69
--- /dev/null
+++ b/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml
@@ -0,0 +1,144 @@
+table_name: dff_node_stats
+main_dttm_col: null
+description: null
+default_endpoint: null
+offset: 0
+cache_timeout: null
+schema: test
+sql: "\nWITH main AS (\n SELECT DISTINCT otel_logs.LogAttributes['context_id']\
+ \ as context_id,\n otel_logs.LogAttributes['request_id'] as request_id,\n \
+ \ otel_traces.Timestamp as start_time,\n otel_traces.SpanName as data_key,\n\
+ \ otel_logs.Body as data,\n JSON_VALUE(otel_logs.Body, '$.label') as label,\n\
+ \ JSON_VALUE(otel_logs.Body, '$.flow') as flow_label,\n JSON_VALUE(otel_logs.Body,\
+ \ '$.node') as node_label,\n otel_logs.TraceId as trace_id,\n otel_traces.TraceId\n\
+ FROM otel_logs, otel_traces\n WHERE otel_logs.TraceId = otel_traces.TraceId and\
+ \ otel_traces.SpanName = 'get_current_label'\n ORDER BY context_id, request_id\n\
+ ) SELECT context_id,\n request_id,\n start_time,\n data_key,\n data,\n\
+ \ label,\n neighbor(label, -1) as prev_label,\n flow_label,\n node_label\n\
+ FROM main\nWHERE label != ''\n"
+params: null
+template_params: null
+filter_select_enabled: false
+fetch_values_predicate: null
+extra: null
+uuid: fda98ab8-f550-45f1-9ded-0113f3e67260
+metrics:
+- metric_name: count
+ verbose_name: null
+ metric_type: null
+ expression: count(*)
+ description: null
+ d3format: null
+ extra: {}
+ warning_text: null
+columns:
+- column_name: data_key
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: LowCardinality(String)
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: start_time
+ verbose_name: null
+ is_dttm: true
+ is_active: true
+ type: DateTime
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: node_label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: prev_label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: flow_label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: context_id
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: request_id
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: data
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+- column_name: label
+ verbose_name: null
+ is_dttm: false
+ is_active: true
+ type: String
+ advanced_data_type: null
+ groupby: true
+ filterable: true
+ expression: null
+ description: null
+ python_date_format: null
+ extra: {}
+version: 1.0.0
+database_uuid: ee32f76e-55e3-483a-935a-22d03072db23
diff --git a/dff/config/superset_dashboard/metadata.yaml b/dff/config/superset_dashboard/metadata.yaml
new file mode 100644
index 000000000..b68dadba1
--- /dev/null
+++ b/dff/config/superset_dashboard/metadata.yaml
@@ -0,0 +1,3 @@
+version: 1.0.0
+type: Dashboard
+timestamp: '2023-07-25T14:27:32.324677+00:00'
diff --git a/dff/pipeline/types.py b/dff/pipeline/types.py
index 0a1e679fe..39584a303 100644
--- a/dff/pipeline/types.py
+++ b/dff/pipeline/types.py
@@ -110,6 +110,13 @@ class ExtraHandlerType(str, Enum):
class ServiceRuntimeInfo(BaseModel):
+ """
+ Type of object, that is passed to components in runtime.
+ Contains current component info (`name`, `path`, `timeout`, `asynchronous`).
+ Also contains `execution_state` - a dictionary,
+ containing execution states of other components mapped to their paths.
+ """
+
name: str
path: str
timeout: Optional[float]
@@ -117,14 +124,6 @@ class ServiceRuntimeInfo(BaseModel):
execution_state: Dict[str, ComponentExecutionState]
-"""
-Type of object, that is passed to components in runtime.
-Contains current component info (`name`, `path`, `timeout`, `asynchronous`).
-Also contains `execution_state` - a dictionary,
-containing execution states of other components mapped to their paths.
-"""
-
-
ExtraHandlerFunction: TypeAlias = Union[
Callable[[Context], Any],
Callable[[Context, _ForwardPipeline], Any],
diff --git a/dff/stats/__init__.py b/dff/stats/__init__.py
index 973e6e2d5..058cd7fbd 100644
--- a/dff/stats/__init__.py
+++ b/dff/stats/__init__.py
@@ -1,2 +1,10 @@
# -*- coding: utf-8 -*-
# flake8: noqa: F401
+
+from . import exporter_patch
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.export import ConsoleSpanExporter
+from opentelemetry.sdk._logs.export import InMemoryLogExporter, ConsoleLogExporter
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader, ConsoleMetricExporter
+from .utils import get_wrapper_field, set_logger_destination, set_tracer_destination
+from .instrumentor import OtelInstrumentor, OTLPMetricExporter, OTLPLogExporter, OTLPSpanExporter
diff --git a/dff/stats/__main__.py b/dff/stats/__main__.py
new file mode 100644
index 000000000..396273ea2
--- /dev/null
+++ b/dff/stats/__main__.py
@@ -0,0 +1,111 @@
+"""
+Main
+----
+This module includes command line scripts for Superset dashboard configuration,
+e.g. for creating and importing configuration archives.
+In a configuration archive, you can define such settings as passwords, networking addressses etc.
+using your own parameters that can be passed as a config file and overridden by command line arguments.
+
+Examples
+********
+
+.. code:: bash
+
+ # Create and import a configuration archive.
+ # The import overrides existing dashboard configurations.
+ dff.stats config.yaml \\
+ -U superset_user \\
+ -P superset_password \\
+ -dP database_password \\
+ --db.user=database_user \\
+ --db.host=clickhouse \\
+ --db.port=8123 \\
+ --db.name=test \\
+ --db.table=otel_logs \\
+ --outfile=config_artifact.zip
+
+"""
+import sys
+import argparse
+from typing import Optional
+
+from .cli import import_dashboard, make_zip_config
+from .utils import PasswordAction
+
+
+def main(parsed_args: Optional[argparse.Namespace] = None):
+ """
+ Function that evokes procedures defined in `cli` module.
+
+ :param parsed_args: Set of command line arguments. If passed, overrides the command line contents.
+ See the module docs for reference.
+ """
+ parser = argparse.ArgumentParser(
+ usage="""Creates a configuration archive and uploads it to the Superset server.
+ The import overrides existing dashboard configurations if present.
+ The function accepts a yaml file; also, all of the options can also be overridden
+ via the command line. Setting passwords interactively is supported.
+
+ dff.stats config.yaml \\
+ -U superset_user \\
+ -P superset_password \\
+ -dP database_password \\
+ --db.user=database_user \\
+ --db.host=clickhouse \\
+ --db.port=8123 \\
+ --db.name=test \\
+ --db.table=otel_logs \\
+ --outfile=config_artifact.zip
+
+ Use the `--help` flag to get more information."""
+ )
+ parser.add_argument("file", type=str)
+ parser.add_argument(
+ "-dD",
+ "--db.driver",
+ choices=["clickhousedb+connect"],
+ help="DBMS driver.",
+ default="clickhousedb+connect",
+ )
+ parser.add_argument("-dU", "--db.user", help="Database user.")
+ parser.add_argument("-dh", "--db.host", default="clickhouse", help="Database host.")
+ parser.add_argument("-dp", "--db.port", help="Database port.")
+ parser.add_argument("-dn", "--db.name", help="Name of the database.")
+ parser.add_argument("-dt", "--db.table", default="otel_logs", help="Name of the table.")
+ parser.add_argument("-o", "--outfile", help="Optionally persist the configuration as a zip file.")
+ parser.add_argument("-H", "--host", default="localhost", help="Superset host")
+ parser.add_argument("-p", "--port", default="8088", help="Superset port.")
+ parser.add_argument("-U", "--username", required=True, help="Superset user.")
+ parser.add_argument(
+ "-P",
+ "--password",
+ dest="password",
+ type=str,
+ action=PasswordAction,
+ help="Superset password.",
+ nargs="?",
+ required=True,
+ )
+ parser.add_argument(
+ "-dP",
+ "--db.password",
+ dest="db.password",
+ type=str,
+ action=PasswordAction,
+ help="Database password.",
+ required=True,
+ nargs="?",
+ )
+
+ if parsed_args is None:
+ parsed_args = parser.parse_args(sys.argv[1:])
+
+ outfile = make_zip_config(parsed_args)
+ import_dashboard(parsed_args, zip_file=str(outfile))
+
+ if not hasattr(parsed_args, "outfile") or parsed_args.outfile is None:
+ outfile.unlink()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dff/stats/cli.py b/dff/stats/cli.py
new file mode 100644
index 000000000..20058a990
--- /dev/null
+++ b/dff/stats/cli.py
@@ -0,0 +1,252 @@
+"""
+Command Line Interface
+----------------------
+This modules defines commands that can be called via the command line interface.
+
+"""
+import tempfile
+import shutil
+import sys
+import argparse
+import os
+import logging
+from urllib import parse
+from pathlib import Path
+from typing import Optional
+
+try:
+ from omegaconf import OmegaConf
+ from .utils import get_superset_session, drop_superset_assets
+except ImportError:
+ raise ImportError("Some packages are not found. Run `pip install dff[stats]`")
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+DFF_DIR = Path(__file__).absolute().parent.parent
+"""
+Root directory of the local `dff` installation.
+
+:meta hide-value:
+"""
+DASHBOARD_DIR = str(DFF_DIR / "config" / "superset_dashboard")
+"""
+Local path to superset dashboard files to import.
+
+:meta hide-value:
+"""
+DASHBOARD_SLUG = "dff-stats"
+"""
+This variable stores a slug used for building the http address of the DFF dashboard.
+"""
+DEFAULT_SUPERSET_URL = parse.urlunsplit(("http", "localhost:8088", "/", "", ""))
+"""
+Default location of the Superset dashboard.
+"""
+
+TYPE_MAPPING_CH = {
+ "FLOAT": "Nullable(Float64)",
+ "STRING": "Nullable(String)",
+ "LONGINTEGER": "Nullable(Int64)",
+ "INTEGER": "Nullable(Int64)",
+ "DATETIME": "Nullable(DateTime)",
+}
+"""
+Mapping of standard sql column types to Clickhouse native types.
+
+:meta hide-value:
+"""
+
+DFF_NODE_STATS_STATEMENT = """
+WITH main AS (
+ SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id,
+ {table}.LogAttributes['request_id'] as request_id,
+ toDateTime(otel_traces.Timestamp) as start_time,
+ otel_traces.SpanName as data_key,
+ {table}.Body as data,
+ {lblfield} as label,
+ {flowfield} as flow_label,
+ {nodefield} as node_label,
+ {table}.TraceId as trace_id,
+ otel_traces.TraceId\nFROM {table}, otel_traces
+ WHERE {table}.TraceId = otel_traces.TraceId and otel_traces.SpanName = 'get_current_label'
+ ORDER BY context_id, request_id
+) SELECT context_id,
+ request_id,
+ start_time,
+ data_key,
+ data,
+ label,
+ {lag} as prev_label,
+ flow_label,
+ node_label
+FROM main
+WHERE label != ''
+"""
+DFF_ACYCLIC_NODES_STATEMENT = """
+WITH main AS (
+ SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id,
+ {table}.LogAttributes['request_id'] as request_id,
+ {table}.Timestamp as timestamp,
+ {lblfield} as label\nFROM {table}
+ INNER JOIN
+(
+ WITH helper AS (
+ SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id,
+ {table}.LogAttributes['request_id'] as request_id,
+ {lblfield} as label
+ FROM {table}
+ )
+ SELECT context_id FROM helper
+ GROUP BY context_id
+ HAVING COUNT(context_id) = COUNT(DISTINCT label)
+) as plain_ctx
+ON plain_ctx.context_id = context_id
+ORDER by context_id, request_id
+)
+SELECT * FROM main
+"""
+DFF_FINAL_NODES_STATEMENT = """
+WITH main AS (
+ SELECT LogAttributes['context_id'] AS context_id,
+ max(LogAttributes['request_id']) AS max_history
+ FROM {table}\nGROUP BY context_id
+)
+SELECT DISTINCT LogAttributes['context_id'] AS context_id,
+LogAttributes['request_id'] AS request_id,
+{table}.Timestamp AS start_time,
+{lblfield} AS label,
+{flowfield} AS flow_label,
+{nodefield} AS node_label
+FROM {table}
+INNER JOIN main
+ON context_id = main.context_id
+AND request_id = main.max_history
+INNER JOIN otel_traces
+ON {table}.TraceId = otel_traces.TraceId
+WHERE otel_traces.SpanName = 'get_current_label'
+"""
+
+SQL_STATEMENT_MAPPING = {
+ "dff_acyclic_nodes.yaml": DFF_ACYCLIC_NODES_STATEMENT,
+ "dff_node_stats.yaml": DFF_NODE_STATS_STATEMENT,
+ "dff_final_nodes.yaml": DFF_FINAL_NODES_STATEMENT,
+}
+"""
+Select statements for dashboard configuration with names and types represented as placeholders.
+The placeholder system makes queries database agnostic, required values are set during the import phase.
+
+:meta hide-value:
+"""
+
+
+def import_dashboard(parsed_args: Optional[argparse.Namespace] = None, zip_file: Optional[str] = None):
+ """
+ Import an Apache Superset dashboard to a local instance with specified arguments.
+ Before using the command, make sure you have your Superset instance
+ up and running: `ghcr.io/deeppavlov/superset_df_dashboard:latest`.
+ The import will override existing dashboard configurations if present.
+
+ :param parsed_args: Command line arguments produced by `argparse`.
+ :param zip_file: Zip archived dashboard config.
+ """
+ host = parsed_args.host if hasattr(parsed_args, "host") else "localhost"
+ port = parsed_args.port if hasattr(parsed_args, "port") else "8088"
+ superset_url = parse.urlunsplit(("http", f"{host}:{port}", "/", "", ""))
+ zip_filename = os.path.basename(zip_file)
+ db_password = getattr(parsed_args, "db.password")
+
+ session, headers = get_superset_session(parsed_args, superset_url)
+ drop_superset_assets(session, headers, superset_url)
+ import_dashboard_url = parse.urljoin(superset_url, "/api/v1/dashboard/import/")
+ # upload files
+ with open(zip_file, "rb") as f:
+ response = session.request(
+ "POST",
+ import_dashboard_url,
+ headers=headers,
+ data={
+ "passwords": '{"databases/dff_database.yaml":"' + db_password + '"}',
+ "overwrite": "true",
+ },
+ files=[("formData", (zip_filename, f, "application/zip"))],
+ )
+ response.raise_for_status()
+ logger.info(f"Upload finished with status {response.status_code}.")
+
+
+def make_zip_config(parsed_args: argparse.Namespace) -> Path:
+ """
+ Make a zip-archived Apache Superset dashboard config, using specified arguments.
+
+ :param parsed_args: Command line arguments produced by `argparse`.
+ """
+ if hasattr(parsed_args, "outfile") and parsed_args.outfile:
+ outfile_name = parsed_args.outfile
+ else:
+ outfile_name = "temp.zip"
+
+ file_conf = OmegaConf.load(parsed_args.file)
+ sys.argv = [__file__] + [f"{key}={value}" for key, value in parsed_args.__dict__.items() if value]
+ cmd_conf = OmegaConf.from_cli()
+ cli_conf = OmegaConf.merge(file_conf, cmd_conf)
+
+ if OmegaConf.select(cli_conf, "db.driver") == "clickhousedb+connect":
+ params = dict(
+ table="${db.table}",
+ lag="neighbor(label, -1)",
+ texttype="String",
+ lblfield="JSON_VALUE(${db.table}.Body, '$.label')",
+ flowfield="JSON_VALUE(${db.table}.Body, '$.flow')",
+ nodefield="JSON_VALUE(${db.table}.Body, '$.node')",
+ )
+ else:
+ raise ValueError("The only supported database driver is 'clickhousedb+connect'.")
+
+ conf = SQL_STATEMENT_MAPPING.copy()
+ for key in conf.keys():
+ conf[key] = {}
+ conf[key]["sql"] = SQL_STATEMENT_MAPPING[key].format(**params)
+
+ resolve_conf = OmegaConf.create(
+ {
+ "database": {
+ "sqlalchemy_uri": "${db.driver}://${db.user}:XXXXXXXXXX@${db.host}:${db.port}/${db.name}",
+ },
+ **conf,
+ }
+ )
+
+ user_config = OmegaConf.merge(cli_conf, resolve_conf)
+ OmegaConf.resolve(user_config)
+
+ with tempfile.TemporaryDirectory() as temp_config_dir:
+ nested_temp_dir = os.path.join(temp_config_dir, "superset_dashboard")
+ logger.info(f"Copying config files to temporary directory: {nested_temp_dir}.")
+
+ shutil.copytree(DASHBOARD_DIR, nested_temp_dir)
+ database_dir = Path(os.path.join(nested_temp_dir, "databases"))
+ dataset_dir = Path(os.path.join(nested_temp_dir, "datasets/dff_database"))
+
+ logger.info("Overriding the initial configuration.")
+ # overwrite sqlalchemy uri
+ for filepath in database_dir.iterdir():
+ file_config = OmegaConf.load(filepath)
+ new_file_config = OmegaConf.merge(file_config, OmegaConf.select(user_config, "database"))
+ OmegaConf.save(new_file_config, filepath)
+
+ # overwrite sql expressions and column types
+ for filepath in dataset_dir.iterdir():
+ file_config = OmegaConf.load(filepath)
+ new_file_config = OmegaConf.merge(file_config, getattr(user_config, filepath.name))
+ if OmegaConf.select(cli_conf, "db.driver") == "clickhousedb+connect":
+ for col in OmegaConf.select(new_file_config, "columns"):
+ col.type = TYPE_MAPPING_CH.get(col.type, col.type)
+ OmegaConf.save(new_file_config, filepath)
+
+ if ".zip" not in outfile_name:
+ raise ValueError(f"Outfile name missing .zip extension: {outfile_name}.")
+ logger.info(f"Saving the archive to {outfile_name}.")
+ shutil.make_archive(outfile_name[: outfile_name.rindex(".zip")], format="zip", root_dir=temp_config_dir)
+
+ return Path(outfile_name)
diff --git a/dff/stats/default_extractors.py b/dff/stats/default_extractors.py
new file mode 100644
index 000000000..d48cd6d26
--- /dev/null
+++ b/dff/stats/default_extractors.py
@@ -0,0 +1,55 @@
+"""
+Default Extractors
+------------------
+This module includes a pool of default extractors
+that you can use out of the box.
+
+The default configuration for Superset dashboard leverages the data collected
+by the extractors below. In order to use the default charts,
+make sure that you include those functions in your pipeline.
+Detailed examples can be found in the `tutorials` section.
+
+"""
+from datetime import datetime
+
+from dff.script import Context
+from dff.pipeline import ExtraHandlerRuntimeInfo
+from .utils import get_wrapper_field
+
+
+async def get_current_label(ctx: Context, _, info: ExtraHandlerRuntimeInfo):
+ """
+ Extract the current label on each turn.
+ This function is required for running the dashboard with the default configuration.
+ """
+ last_label = ctx.last_label
+ if last_label is None:
+ return {"flow": None, "node": None, "label": None}
+ return {"flow": last_label[0], "node": last_label[1], "label": ": ".join(last_label)}
+
+
+async def get_timing_before(ctx: Context, _, info: ExtraHandlerRuntimeInfo):
+ """
+ Extract the pipeline component's start time.
+ This function is required for running the dashboard with the default configuration.
+
+ The function leverages the `framework_states` field of the context to store results.
+ As a result, the function output is cleared on every turn and does not get persisted
+ to the context storage.
+ """
+ start_time = datetime.now()
+ ctx.framework_states[get_wrapper_field(info, "time")] = start_time
+
+
+async def get_timing_after(ctx: Context, _, info: ExtraHandlerRuntimeInfo): # noqa: F811
+ """
+ Extract the pipeline component's finish time.
+ This function is required for running the dashboard with the default configuration.
+
+ The function leverages the `framework_states` field of the context to store results.
+ As a result, the function output is cleared on every turn and does not get persisted
+ to the context storage.
+ """
+ start_time = ctx.framework_states[get_wrapper_field(info, "time")]
+ data = {"execution_time": str(datetime.now() - start_time)}
+ return data
diff --git a/dff/stats/exporter_patch.py b/dff/stats/exporter_patch.py
new file mode 100644
index 000000000..f87ca387b
--- /dev/null
+++ b/dff/stats/exporter_patch.py
@@ -0,0 +1,37 @@
+"""
+Exporter patch
+----------------
+This module contains temporary utilities
+that patch Opentelemetry's GRPC log exporter
+which makes it possible to export arbitrary dict-like values.
+"""
+from typing import Mapping
+from wrapt import wrap_function_wrapper as _wrap
+import opentelemetry.exporter.otlp.proto.grpc as otlp_exporter_grpc
+from opentelemetry.exporter.otlp.proto.grpc.exporter import _translate_key_values
+from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValueList
+
+
+def grpc_mapping_translation_patch(translate_value, _, args, kwargs):
+ """
+ This decorator patches the `_translate_value` function
+ from OpenTelemetry GRPC Log exporter module allowing the class
+ to translate values of type `dict` and `NoneType`
+ into the `protobuf` protocol.
+
+ :param _translate_value: The original function.
+ :param args: Positional arguments of the original function.
+ :param kwargs: Keyword arguments of the original function.
+ """
+ translated_value = args[0]
+ if isinstance(translated_value, type(None)):
+ return AnyValue(string_value="")
+ if isinstance(translated_value, Mapping):
+ return AnyValue(
+ kvlist_value=KeyValueList(values=[_translate_key_values(str(k), v) for k, v in translated_value.items()])
+ )
+ else:
+ return translate_value(*args, **kwargs)
+
+
+_wrap(otlp_exporter_grpc, "exporter._translate_value", grpc_mapping_translation_patch)
diff --git a/dff/stats/instrumentor.py b/dff/stats/instrumentor.py
new file mode 100644
index 000000000..a053c532b
--- /dev/null
+++ b/dff/stats/instrumentor.py
@@ -0,0 +1,195 @@
+"""
+Instrumentor
+-------------
+This modules contains the :py:class:`~OtelInstrumentor` class that implements
+Opentelemetry's `BaseInstrumentor` interface and allows for automated
+instrumentation of Dialog Flow Framework applications,
+e.g. for automated logging and log export.
+
+For detailed reference, see `~OtelInstrumentor` class.
+"""
+import asyncio
+from typing import Collection, Optional
+
+from wrapt import wrap_function_wrapper, decorator
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.instrumentation.utils import unwrap
+from opentelemetry.metrics import get_meter, get_meter_provider, Meter
+from opentelemetry.trace import get_tracer, get_tracer_provider, Tracer
+from opentelemetry._logs import get_logger, get_logger_provider, Logger, SeverityNumber
+from opentelemetry.trace import SpanKind, Span
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk._logs import LoggerProvider, LogRecord
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
+
+from dff.script.core.context import get_last_index
+from dff.stats.utils import (
+ resource,
+ get_wrapper_field,
+ set_logger_destination,
+ set_meter_destination,
+ set_tracer_destination,
+)
+from dff.stats import default_extractors
+
+
+INSTRUMENTS = ["dff"]
+
+
+class OtelInstrumentor(BaseInstrumentor):
+ """
+ Utility class for instrumenting DFF-related functions
+ that implements the :py:class:`~BaseInstrumentor` interface.
+ :py:meth:`~instrument` and :py:meth:`~uninstrument` methods
+ are available to apply and revert the instrumentation effects,
+ e.g. enable and disable logging at runtime.
+
+ .. code-block::
+
+ dff_instrumentor = OtelInstrumentor()
+ dff_instrumentor.instrument()
+ dff_instrumentor.uninstrument()
+
+ Opentelemetry provider instances can be optionally passed to the class constructor.
+ Otherwise, the global logger, tracer and meter providers are leveraged.
+
+ The class implements the :py:meth:`~__call__` method, so that
+ regular functions can be decorated using the class instance.
+
+ .. code-block::
+
+ @dff_instrumentor
+ def function(context, pipeline, runtime_info):
+ ...
+
+ :param logger_provider: Opentelemetry logger provider. Used to construct a logger instance.
+ :param tracer_provider: Opentelemetry tracer provider. Used to construct a tracer instance.
+ :parame meter_provider: Opentelemetry meter provider. Used to construct a meter instance.
+ """
+
+ def __init__(self, logger_provider=None, tracer_provider=None, meter_provider=None) -> None:
+ super().__init__()
+ self._logger_provider: Optional[LoggerProvider] = None
+ self._tracer_provider: Optional[TracerProvider] = None
+ self._meter_provider: Optional[MeterProvider] = None
+ self._logger: Optional[Logger] = None
+ self._tracer: Optional[Tracer] = None
+ self._meter: Optional[Meter] = None
+ self._configure_providers(
+ logger_provider=logger_provider, tracer_provider=tracer_provider, meter_provider=meter_provider
+ )
+
+ def __enter__(self):
+ if not self.is_instrumented_by_opentelemetry:
+ self.instrument()
+ return self
+
+ def __exit__(self):
+ if self.is_instrumented_by_opentelemetry:
+ self.uninstrument()
+
+ @classmethod
+ def from_url(cls, url: str, insecure: bool = True, timeout: Optional[int] = None):
+ """
+ Construct an instrumentor instance using only the url of the OTLP Collector.
+ Inherently modifies the global provider instances adding an export destination
+ for the target url.
+
+ .. code-block::
+
+ instrumentor = OtelInstrumentor.from_url("grpc://localhost:4317")
+
+ :param url: Url of the running Otel Collector server. Due to limited support of HTTP protocol
+ by the Opentelemetry Python extension, GRPC protocol is preferred.
+ :param insecure: Whether non-SSL-protected connection is allowed. Defaults to True.
+ :param timeout: Connection timeout in seconds, optional.
+ """
+ set_logger_destination(OTLPLogExporter(endpoint=url, insecure=insecure, timeout=timeout))
+ set_tracer_destination(OTLPSpanExporter(endpoint=url, insecure=insecure, timeout=timeout))
+ set_meter_destination(OTLPMetricExporter(endpoint=url, insecure=insecure, timeout=timeout))
+ return cls()
+
+ def instrumentation_dependencies(self) -> Collection[str]:
+ """
+ :meta private:
+
+ Required libraries. Implements the Python Opentelemetry instrumentor interface.
+
+ """
+ return INSTRUMENTS
+
+ def _instrument(self, logger_provider=None, tracer_provider=None, meter_provider=None):
+ if any([logger_provider, meter_provider, tracer_provider]):
+ self._configure_providers(
+ logger_provider=logger_provider, tracer_provider=tracer_provider, meter_provider=meter_provider
+ )
+ wrap_function_wrapper(default_extractors, "get_current_label", self.__call__.__wrapped__)
+ wrap_function_wrapper(default_extractors, "get_timing_before", self.__call__.__wrapped__)
+ wrap_function_wrapper(default_extractors, "get_timing_after", self.__call__.__wrapped__)
+
+ def _uninstrument(self, **kwargs):
+ unwrap(default_extractors, "get_current_label")
+ unwrap(default_extractors, "get_timing_before")
+ unwrap(default_extractors, "get_timing_after")
+
+ def _configure_providers(self, logger_provider, tracer_provider, meter_provider):
+ self._logger_provider = logger_provider or get_logger_provider()
+ self._tracer_provider = tracer_provider or get_tracer_provider()
+ self._meter_provider = meter_provider or get_meter_provider()
+ self._logger = get_logger(__name__, None, self._logger_provider)
+ self._tracer = get_tracer(__name__, None, self._tracer_provider)
+ self._meter = get_meter(__name__, None, self._meter_provider)
+
+ @decorator
+ async def __call__(self, wrapped, _, args, kwargs):
+ """
+ Regular functions that match the :py:class:`~dff.pipeline.types.ExtraHandlerFunction`
+ signature can be decorated with the class instance to log the returned value.
+ This method implements the logging procedure.
+ The returned value is assumed to be `dict` or `NoneType`.
+ Logging non-atomic values is discouraged, as they cannot be translated using
+ the `Protobuf` protocol.
+ Logging is ignored if the application is in 'uninstrumented' state.
+
+ :param wrapped: Function to decorate.
+ :param args: Positional arguments of the decorated function.
+ :param kwargs: Keyword arguments of the decorated function.
+ """
+ ctx, _, info = args
+ pipeline_component = get_wrapper_field(info)
+ attributes = {
+ "context_id": str(ctx.id),
+ "request_id": get_last_index(ctx.requests),
+ "pipeline_component": pipeline_component,
+ }
+
+ result: Optional[dict]
+ if asyncio.iscoroutinefunction(wrapped):
+ result = await wrapped(ctx, _, info)
+ else:
+ result = wrapped(ctx, _, info)
+
+ if result is None or not self.is_instrumented_by_opentelemetry:
+ # self.is_instrumented_by_opentelemetry allows to disable
+ # the decorator programmatically if
+ # instrumentation is disabled.
+ return result
+
+ span: Span
+ with self._tracer.start_as_current_span(wrapped.__name__, kind=SpanKind.INTERNAL) as span:
+ span_ctx = span.get_span_context()
+ record = LogRecord(
+ span_id=span_ctx.span_id,
+ trace_id=span_ctx.trace_id,
+ body=result,
+ trace_flags=span_ctx.trace_flags,
+ severity_text=None,
+ severity_number=SeverityNumber(1),
+ resource=resource,
+ attributes=attributes,
+ )
+ self._logger.emit(record=record)
+ return result
diff --git a/dff/stats/utils.py b/dff/stats/utils.py
new file mode 100644
index 000000000..9f0426864
--- /dev/null
+++ b/dff/stats/utils.py
@@ -0,0 +1,203 @@
+"""
+Utils
+-----
+This module includes utility functions designed for statistics collection.
+
+The functions below can be used to configure the opentelemetry destination.
+
+.. code:: python
+
+ set_logger_destination(OTLPLogExporter("grpc://localhost:4317", insecure=True))
+ set_tracer_destination(OTLPSpanExporter("grpc://localhost:4317", insecure=True))
+
+"""
+import json
+import getpass
+from urllib import parse
+from typing import Optional, Tuple
+from argparse import Namespace, Action
+
+import requests
+from . import exporter_patch # noqa: F401
+from opentelemetry.sdk.resources import Resource
+from opentelemetry._logs import get_logger_provider, set_logger_provider
+from opentelemetry.trace import get_tracer_provider, set_tracer_provider
+from opentelemetry.metrics import get_meter_provider, set_meter_provider
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk._logs import LoggerProvider
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter, SpanExporter
+from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter, LogExporter
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter, MetricExporter
+
+from dff.pipeline import ExtraHandlerRuntimeInfo
+
+SERVICE_NAME = "dialog_flow_framework"
+
+resource = Resource.create({"service.name": SERVICE_NAME})
+"""
+Singletone :py:class:`~Resource` instance shared inside the framework.
+"""
+tracer_provider = TracerProvider(resource=resource)
+"""
+Global tracer provider bound to the DFF resource.
+"""
+logger_provider = LoggerProvider(resource=resource)
+"""
+Global logger provider bound to the DFF resource.
+"""
+set_logger_provider(logger_provider)
+set_tracer_provider(tracer_provider)
+
+
+def set_logger_destination(exporter: Optional[LogExporter] = None):
+ """
+ Configure the global Opentelemetry logger provider to export logs to the given destination.
+
+ :param exporter: Opentelemetry log exporter instance.
+ """
+ if exporter is None:
+ exporter = OTLPLogExporter(endpoint="grpc://localhost:4317", insecure=True)
+ get_logger_provider().add_log_record_processor(BatchLogRecordProcessor(exporter))
+
+
+def set_meter_destination(exporter: Optional[MetricExporter] = None):
+ """
+ Configure the global Opentelemetry meter provider to export metrics to the given destination.
+
+ :param exporter: Opentelemetry meter exporter instance.
+ """
+ if exporter is None:
+ exporter = OTLPMetricExporter(endpoint="grpc://localhost:4317", insecure=True)
+ cur_meter_provider = get_meter_provider()
+ new_meter_provider = MeterProvider(resource=resource, metric_readers=[PeriodicExportingMetricReader(exporter)])
+ if not isinstance(cur_meter_provider, MeterProvider):
+ set_meter_provider(new_meter_provider)
+
+
+def set_tracer_destination(exporter: Optional[SpanExporter] = None):
+ """
+ Configure the global Opentelemetry tracer provider to export traces to the given destination.
+
+ :param exporter: Opentelemetry span exporter instance.
+ """
+ if exporter is None:
+ exporter = OTLPSpanExporter(endpoint="grpc://localhost:4317", insecure=True)
+ get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter))
+
+
+def get_wrapper_field(info: ExtraHandlerRuntimeInfo, postfix: str = "") -> str:
+ """
+ This function can be used to obtain a key, under which the wrapper data will be stored
+ in the context.
+
+ :param info: Handler runtime info obtained from the pipeline.
+ :param postfix: Field-specific postfix that will be appended to the field name.
+ """
+ path = info.component.path.replace(".", "-")
+ return f"{path}" + (f"-{postfix}" if postfix else "")
+
+
+def get_superset_session(args: Namespace, base_url: str = "http://localhost:8088/") -> Tuple[requests.Session, dict]:
+ """
+ Utility function for authorized interaction with Superset HTTP API.
+
+ :param args: Command line arguments including Superset username and Superset password.
+ :param base_url: Base Superset URL.
+
+ :return: Authorized session - authorization headers tuple.
+ """
+ healthcheck_url = parse.urljoin(base_url, "/healthcheck")
+ login_url = parse.urljoin(base_url, "/api/v1/security/login")
+ csrf_url = parse.urljoin(base_url, "/api/v1/security/csrf_token/")
+
+ session = requests.Session()
+ # do healthcheck
+ response = session.get(healthcheck_url, timeout=10)
+ response.raise_for_status()
+ # get access token
+ access_request = session.post(
+ login_url,
+ headers={"Content-Type": "application/json", "Accept": "*/*"},
+ data=json.dumps({"username": args.username, "password": args.password, "refresh": True, "provider": "db"}),
+ )
+ access_token = access_request.json()["access_token"]
+ # get csrf_token
+ csrf_request = session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"})
+ csrf_token = csrf_request.json()["result"]
+ headers = {
+ "Authorization": f"Bearer {access_token}",
+ "X-CSRFToken": csrf_token,
+ }
+ return session, headers
+
+
+def drop_superset_assets(session: requests.Session, headers: dict, base_url: str):
+ """
+ Drop the existing assets from the Superset dashboard.
+
+ :param session: Authorized Superset session.
+ :param headers: Superset session headers.
+ :param base_url: Base Superset URL.
+ """
+ dashboard_url = parse.urljoin(base_url, "/api/v1/dashboard")
+ charts_url = parse.urljoin(base_url, "/api/v1/chart")
+ datasets_url = parse.urljoin(base_url, "/api/v1/dataset")
+ database_url = parse.urljoin(base_url, "/api/v1/database/")
+ delete_res: requests.Response
+
+ dashboard_res = session.get(dashboard_url, headers=headers)
+ dashboard_json = dashboard_res.json()
+ if dashboard_json["count"] > 0:
+ delete_res = requests.delete(dashboard_url, params={"q": json.dumps(dashboard_json["ids"])}, headers=headers)
+ delete_res.raise_for_status()
+
+ charts_result = session.get(charts_url, headers=headers)
+ charts_json = charts_result.json()
+ if charts_json["count"] > 0:
+ delete_res = requests.delete(charts_url, params={"q": json.dumps(charts_json["ids"])}, headers=headers)
+ delete_res.raise_for_status()
+
+ datasets_result = session.get(datasets_url, headers=headers)
+ datasets_json = datasets_result.json()
+ if datasets_json["count"] > 0:
+ delete_res = requests.delete(datasets_url, params={"q": json.dumps(datasets_json["ids"])}, headers=headers)
+ delete_res.raise_for_status()
+
+ database_res = session.get(database_url, headers=headers)
+ database_json = database_res.json()
+ if database_json["count"] > 0:
+ delete_res = requests.delete(database_url + str(database_json["ids"][-1]), headers=headers)
+ delete_res.raise_for_status()
+
+
+class PasswordAction(Action):
+ """
+ Child class for Argparse's :py:class:`~Action` that prompts users for passwords interactively,
+ ensuring password safety, unless the password is specified directly.
+
+ """
+
+ def __init__(
+ self, option_strings, dest=None, nargs=0, default=None, required=False, type=None, metavar=None, help=None
+ ):
+ super().__init__(
+ option_strings=option_strings,
+ dest=dest,
+ nargs=nargs,
+ default=default,
+ required=required,
+ metavar=metavar,
+ type=type,
+ help=help,
+ )
+
+ def __call__(self, parser, args, values, option_string=None):
+ if values:
+ print(f"{self.dest}: setting passwords explicitly through the command line is discouraged.")
+ setattr(args, self.dest, values)
+ else:
+ setattr(args, self.dest, getpass.getpass(prompt=f"{self.dest}: "))
diff --git a/dff/utils/docker/README.md b/dff/utils/docker/README.md
new file mode 100644
index 000000000..6caf4490f
--- /dev/null
+++ b/dff/utils/docker/README.md
@@ -0,0 +1,11 @@
+# DFF Docker utils
+
+## Description
+
+This directory provides Docker files, necessary for deployment
+of various DFF utilities.
+
+## Contents
+
+* dockerfile_stats - Dockerfile for DFF statistics dashboard.
+* entrypoint_stats.sh - Entrypoint script for DFF statistics dashboard.
\ No newline at end of file
diff --git a/dff/utils/docker/dockerfile_stats b/dff/utils/docker/dockerfile_stats
new file mode 100644
index 000000000..5017fd975
--- /dev/null
+++ b/dff/utils/docker/dockerfile_stats
@@ -0,0 +1,6 @@
+FROM apache/superset:2.1.0rc1
+USER root
+RUN cd /app && pip install .[clickhouse]
+COPY entrypoint_stats.sh /app/docker/
+USER superset
+ENTRYPOINT ["/bin/bash","/app/docker/entrypoint_stats.sh"]
\ No newline at end of file
diff --git a/dff/utils/docker/entrypoint_stats.sh b/dff/utils/docker/entrypoint_stats.sh
new file mode 100644
index 000000000..663a39b75
--- /dev/null
+++ b/dff/utils/docker/entrypoint_stats.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+export SERVER_THREADS_AMOUNT=8
+set -m
+nohup /bin/bash /usr/bin/run-server.sh &
+superset fab create-admin --firstname superset --lastname admin --username $SUPERSET_USERNAME --email admin@admin.com --password $SUPERSET_PASSWORD
+superset db upgrade
+superset init
+fg
\ No newline at end of file
diff --git a/dff/utils/otel/otelcol-config-extras.yml b/dff/utils/otel/otelcol-config-extras.yml
new file mode 100644
index 000000000..240bb5174
--- /dev/null
+++ b/dff/utils/otel/otelcol-config-extras.yml
@@ -0,0 +1,30 @@
+# Copyright The OpenTelemetry Authors
+# SPDX-License-Identifier: Apache-2.0
+# extra settings to be merged into OpenTelemetry Collector configuration
+# do not delete this file
+exporters:
+ clickhouse:
+ endpoint: tcp://clickhouse:9000
+ database: test
+ username: username
+ password: pass
+ logs_table_name: otel_logs
+ traces_table_name: otel_traces
+ metrics_table_name: otel_metrics
+ timeout: 5s
+ retry_on_failure:
+ enabled: true
+ initial_interval: 5s
+ max_interval: 30s
+ max_elapsed_time: 300s
+
+service:
+ pipelines:
+ traces:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [logging, clickhouse]
+ logs:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [logging, clickhouse]
\ No newline at end of file
diff --git a/dff/utils/otel/otelcol-config.yml b/dff/utils/otel/otelcol-config.yml
new file mode 100644
index 000000000..f152c8c35
--- /dev/null
+++ b/dff/utils/otel/otelcol-config.yml
@@ -0,0 +1,40 @@
+# Copyright The OpenTelemetry Authors
+# SPDX-License-Identifier: Apache-2.0
+
+
+receivers:
+ otlp:
+ protocols:
+ grpc:
+ http:
+ cors:
+ allowed_origins:
+ - "http://*"
+ - "https://*"
+
+exporters:
+ logging:
+
+processors:
+ batch:
+ filter:
+ metrics:
+ exclude:
+ match_type: strict
+ metric_names:
+ - queueSize
+
+service:
+ pipelines:
+ metrics:
+ receivers: [otlp]
+ processors: [filter, batch]
+ exporters: [logging]
+ logs:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [logging]
+ traces:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [logging]
\ No newline at end of file
diff --git a/dff/utils/testing/telegram.py b/dff/utils/testing/telegram.py
index 720dce6a6..d06ba3bb6 100644
--- a/dff/utils/testing/telegram.py
+++ b/dff/utils/testing/telegram.py
@@ -59,7 +59,7 @@ class TelegramTesting: # pragma: no cover
:param pipeline:
Pipeline with the telegram messenger interface.
Required for :py:meth:`~dff.utils.testing.telegram.TelegramTesting.send_and_check` and
- :py:meth`~dff.utils.testing.telegram.TelegramTesting.check_happy_path` with `run_bot=True`
+ :py:meth:`~dff.utils.testing.telegram.TelegramTesting.check_happy_path` with `run_bot=True`
:param api_credentials:
Telegram API id and hash.
Obtainable via https://core.telegram.org/api/obtaining_api_id.
diff --git a/docker-compose.yml b/docker-compose.yml
index dc1f5bca7..9c33c632a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -34,3 +34,35 @@ services:
# - 2135:2135
- 2136:2136
hostname: localhost
+ dashboard:
+ env_file: [.env_file]
+ build:
+ context: ./dff/utils/docker
+ dockerfile: dockerfile_stats
+ image: ghcr.io/deeppavlov/superset_df_dashboard:latest
+ ports:
+ - "8088:8088"
+ clickhouse:
+ env_file: [.env_file]
+ image: clickhouse/clickhouse-server:latest
+ restart: unless-stopped
+ ports:
+ - '8123:8123'
+ - '8443:8443'
+ - '9000:9000'
+ volumes:
+ - ch-data:/var/lib/clickhouse/
+ otelcol:
+ image: otel/opentelemetry-collector-contrib:latest
+ container_name: otel-col
+ restart: unless-stopped
+ command: [ "--config=/etc/otelcol-config.yml", "--config=/etc/otelcol-config-extras.yml" ]
+ volumes:
+ - ./dff/utils/otel/otelcol-config.yml:/etc/otelcol-config.yml:ro
+ - ./dff/utils/otel/otelcol-config-extras.yml:/etc/otelcol-config-extras.yml:ro
+ ports:
+ - "4317:4317" # OTLP over gRPC receiver
+ - "4318:4318" # OTLP over HTTP receiver
+volumes:
+ ch-data:
+ name: "ch-data"
diff --git a/docs/source/_static/images/additional_stats.png b/docs/source/_static/images/additional_stats.png
new file mode 100644
index 000000000..2b9d2c3f5
Binary files /dev/null and b/docs/source/_static/images/additional_stats.png differ
diff --git a/docs/source/_static/images/databases.png b/docs/source/_static/images/databases.png
new file mode 100644
index 000000000..167c4ce61
Binary files /dev/null and b/docs/source/_static/images/databases.png differ
diff --git a/docs/source/_static/images/general_stats.png b/docs/source/_static/images/general_stats.png
new file mode 100644
index 000000000..660f2ed89
Binary files /dev/null and b/docs/source/_static/images/general_stats.png differ
diff --git a/docs/source/_static/images/overview.png b/docs/source/_static/images/overview.png
new file mode 100644
index 000000000..5bd0fb6a1
Binary files /dev/null and b/docs/source/_static/images/overview.png differ
diff --git a/docs/source/_static/images/service_stats.png b/docs/source/_static/images/service_stats.png
new file mode 100644
index 000000000..7ce724286
Binary files /dev/null and b/docs/source/_static/images/service_stats.png differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4bc3731dd..47ee31a1b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -170,6 +170,7 @@ def setup(_):
],
),
("tutorials.utils", "Utils"),
+ ("tutorials.stats", "Stats"),
]
)
regenerate_apiref(
@@ -178,6 +179,7 @@ def setup(_):
("dff.messengers", "Messenger Interfaces"),
("dff.pipeline", "Pipeline"),
("dff.script", "Script"),
+ ("dff.stats", "Stats"),
("dff.utils.testing", "Utils"),
]
)
diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst
index cdb90a035..8724a1489 100644
--- a/docs/source/user_guides.rst
+++ b/docs/source/user_guides.rst
@@ -9,8 +9,17 @@ those include but are not limited to: dialog graph creation, specifying start an
setting transitions and conditions, using ``Context`` object in order to receive information
about current script execution.
+:doc:`Superset guide <./user_guides/superset_guide>`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``superset guide`` tutorial highlights the usage of Superset visualization tool
+for exploring the telemetry data collected from your conversational services.
+We show how to plug in the telemetry collection and configure the pre-built
+Superset dashboard shipped with DFF.
+
.. toctree::
:hidden:
user_guides/basic_conceptions
+ user_guides/superset_guide
diff --git a/docs/source/user_guides/superset_guide.rst b/docs/source/user_guides/superset_guide.rst
new file mode 100644
index 000000000..5add23bde
--- /dev/null
+++ b/docs/source/user_guides/superset_guide.rst
@@ -0,0 +1,118 @@
+Superset guide
+---------------------
+
+Description
+~~~~~~~~~~~
+
+| The Dialog Flow Stats module can be used to obtain and visualize usage statistics for your service.
+| The module relies on several open source solutions that allow for data persistence and visualization
+
+* `Clickhouse `_ serves as an OLAP storage for data.
+* Batching and preprocessing data is based on `OpenTelemetry protocol `_ and the `OpenTelemetry collector `_.
+* Interactive visualization is powered by `Apache Superset `_.
+
+All the mentioned services are shipped as Docker containers, including a pre-built Superset image that ensures API compatibility.
+
+Collection procedure
+~~~~~~~~~~~~~~~~~~~~
+
+**Installation**
+
+.. code-block:: shell
+
+ pip install dff[stats]
+
+**Launching services**
+
+.. code-block:: shell
+ :linenos:
+
+ # clone the original repository to access the docker-compose file
+ git clone https://github.com/deeppavlov/dialog_flow_framework.git
+ # launch the required services
+ cd dialog_flow_framework
+ docker-compose up otelcol clickhouse dashboard
+
+**Collecting data**
+
+Collecting data is done by means of instrumenting your conversational service before you run it.
+DFF tutorials showcase all the necessary steps, needed to achieve that. We will run
+`one of those files <../tutorials/tutorials.stats.1_extractor_functions.py>`_
+in order to obtain sample data points to visualize.
+
+.. code-block:: shell
+
+ export DISABLE_INTERACTIVE_MODE=1 && python tutorials/stats/1_extractor_functions.py
+
+Displaying the data
+~~~~~~~~~~~~~~~~~~~
+
+In order to display the Superset dashboard, you should update the default configuration with the credentials of your database.
+The configuration can be optionally saved as a zip archive for inspection / debug.
+
+You can set most of the configuration options using a YAML file.
+The default example file can be found in the `tutorials/stats` directory:
+
+.. code-block:: yaml
+ :linenos:
+
+ # tutorials/stats/example_config.yaml
+ db:
+ driver: clickhousedb+connect
+ name: test
+ user: username
+ host: clickhouse
+ port: 8123
+ table: otel_logs
+
+The file can then be used to parametrize the configuration script.
+
+.. code-block:: shell
+
+ dff.stats tutorials/stats/example_config.yaml -P superset -dP pass -U superset --outfile=config_artifact.zip
+
+.. warning::
+
+ Here we passed passwords via CLI, which is not recommended. For enhanced security, call the command above omitting the passwords (`dff.stats -P -dP -U superset ...`) and you will be prompted to enter them interactively.
+
+Running the command will automatically import the dashboard as well as the data sources
+into the running superset server. If you are using a version of Superset different from the one
+shipped with DFF, make sure that your access rights are sufficient to edit the workspace.
+
+Using Superset
+~~~~~~~~~~~~~~
+
+| In order to view the imported dashboard, log into `Superset `_ using your username and password (which are both `superset` by default and can be configured via `.env_file`).
+| The dashboard will then be available in the **Dashboards** section of the Superset UI under the name of **DFF stats**.
+| The dashboard has four sections, each one of them containing different kind of data.
+
+* The **Overview** section summarizes the information about user interaction with your script. And displays a weighted graph of transitions from one node to another. The data is also shown in the form of a table for better introspection capabilities.
+
+.. figure:: ../_static/images/overview.png
+
+ Overview plots.
+
+* The data displayed in the **General stats** section reports, how frequent each of the nodes in your script was visited by users. The information is aggregated in several forms for better interpretability.
+
+.. figure:: ../_static/images/general_stats.png
+
+ General stats plots.
+
+* The **Additional stats** section includes charts for node visit counts aggregated over various specific variables.
+
+.. figure:: ../_static/images/additional_stats.png
+
+ Additional stats plots.
+
+* General service load data aggregated over time can be found in the **Service stats** section.
+
+.. figure:: ../_static/images/service_stats.png
+
+ Service stats plots.
+
+On some occasions, Superset can show warnings about the database connection being faulty.
+In that case, you can navigate to the `Database Connections` section through the `Settings` menu and edit the `dff_database` instance updating the credentials.
+
+.. figure:: ../_static/images/databases.png
+
+ Locate the database settings in the right corner of the screen.
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 70358188a..cdafaaec7 100644
--- a/setup.py
+++ b/setup.py
@@ -79,6 +79,32 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
"pytelegrambotapi",
]
+requests_requirements = [
+ "requests==2.31.0",
+]
+
+otl_dependencies = [
+ "opentelemetry-api==1.17.0",
+ "opentelemetry-exporter-otlp==1.17.0",
+ "opentelemetry-exporter-otlp-proto-grpc==1.17.0",
+ "opentelemetry-exporter-otlp-proto-http==1.17.0",
+ "opentelemetry-instrumentation==0.38b0",
+ "opentelemetry-proto==1.17.0",
+ "opentelemetry-sdk==1.17.0",
+ "opentelemetry-semantic-conventions==0.38b0",
+]
+
+stats_dependencies = merge_req_lists(
+ _sql_dependencies,
+ requests_requirements,
+ otl_dependencies,
+ [
+ "wrapt==1.15.0",
+ "tqdm==4.62.3",
+ "omegaconf>=2.2.2",
+ ],
+)
+
full = merge_req_lists(
core,
async_files_dependencies,
@@ -88,13 +114,10 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
mysql_dependencies,
postgresql_dependencies,
ydb_dependencies,
+ stats_dependencies,
telegram_dependencies,
)
-requests_requirements = [
- "requests==2.31.0",
-]
-
test_requirements = merge_req_lists(
[
"pytest==7.4.0",
@@ -105,6 +128,9 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
"click==8.1.3",
"black==23.7.0",
"isort==5.12.0",
+ "aiochclient>=2.2.0",
+ "httpx<=0.23.0",
+ "sqlparse==0.4.4",
],
requests_requirements,
)
@@ -162,6 +188,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
mypy_dependencies,
)
+
EXTRA_DEPENDENCIES = {
"core": core, # minimal dependencies (by default)
"json": async_files_dependencies, # dependencies for using JSON
@@ -172,6 +199,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
"mysql": mysql_dependencies, # dependencies for using MySQL
"postgresql": postgresql_dependencies, # dependencies for using PostgreSQL
"ydb": ydb_dependencies, # dependencies for using Yandex Database
+ "stats": stats_dependencies, # dependencies for statistics collection
"telegram": telegram_dependencies, # dependencies for using Telegram
"full": full, # full dependencies including all options above
"tests": test_requirements, # dependencies for running tests
@@ -210,4 +238,5 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]:
install_requires=core,
test_suite="tests",
extras_require=EXTRA_DEPENDENCIES,
+ entry_points={"console_scripts": ["dff.stats=dff.stats.__main__:main"]},
)
diff --git a/tests/stats/__init__.py b/tests/stats/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/stats/conftest.py b/tests/stats/conftest.py
new file mode 100644
index 000000000..4fbee4ceb
--- /dev/null
+++ b/tests/stats/conftest.py
@@ -0,0 +1,65 @@
+import pytest
+
+try:
+ from dff.stats import InMemoryLogExporter, InMemorySpanExporter, OTLPLogExporter, OTLPSpanExporter
+ from dff.stats.instrumentor import resource
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
+ from opentelemetry.sdk.trace import TracerProvider
+ from opentelemetry.sdk._logs import LoggerProvider
+
+ opentelemetry_available = True
+except ImportError:
+ opentelemetry_available = False
+
+
+@pytest.fixture(scope="function")
+def tracer_exporter_and_provider():
+ if not opentelemetry_available:
+ pytest.skip("One of the Opentelemetry packages is missing.")
+ exporter = InMemorySpanExporter()
+ tracer_provider = TracerProvider(resource=resource)
+ tracer_provider.add_span_processor(BatchSpanProcessor(exporter, schedule_delay_millis=900))
+ yield exporter, tracer_provider
+
+
+@pytest.fixture(scope="function")
+def log_exporter_and_provider():
+ if not opentelemetry_available:
+ pytest.skip("One of the Opentelemetry packages is missing.")
+ exporter = InMemoryLogExporter()
+ logger_provider = LoggerProvider(resource=resource)
+ logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter, schedule_delay_millis=900))
+ yield exporter, logger_provider
+
+
+@pytest.fixture(scope="function")
+def otlp_trace_exp_provider():
+ if not opentelemetry_available:
+ pytest.skip("One of the Opentelemetry packages is missing.")
+ exporter = OTLPSpanExporter("grpc://localhost:4317", insecure=True)
+ tracer_provider = TracerProvider(resource=resource)
+ tracer_provider.add_span_processor(BatchSpanProcessor(exporter, schedule_delay_millis=900))
+ yield exporter, tracer_provider
+
+
+@pytest.fixture(scope="function")
+def otlp_log_exp_provider():
+ if not opentelemetry_available:
+ pytest.skip("One of the Opentelemetry packages is missing.")
+ exporter = OTLPLogExporter("grpc://localhost:4317", insecure=True)
+ logger_provider = LoggerProvider(resource=resource)
+ logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter, schedule_delay_millis=900))
+ yield exporter, logger_provider
+
+
+@pytest.fixture(scope="session") # test saving configs to zip
+def testing_cfg_dir(tmpdir_factory):
+ cfg_dir = tmpdir_factory.mktemp("cfg")
+ yield str(cfg_dir)
+
+
+@pytest.fixture(scope="function") # test saving to csv
+def testing_file(tmpdir_factory):
+ fn = tmpdir_factory.mktemp("data").join("stats.csv")
+ return str(fn)
diff --git a/tests/stats/test_defaults.py b/tests/stats/test_defaults.py
new file mode 100644
index 000000000..11c525723
--- /dev/null
+++ b/tests/stats/test_defaults.py
@@ -0,0 +1,52 @@
+import pytest
+
+try:
+ from wrapt import wrap_function_wrapper # noqa: F401
+ from dff.stats import OtelInstrumentor
+except ImportError:
+ pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.")
+
+from dff.script import Context
+from dff.pipeline.types import ExtraHandlerRuntimeInfo, ServiceRuntimeInfo
+from dff.stats import default_extractors
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ "context,expected",
+ [
+ (Context(), set()),
+ (Context(labels={0: ("a", "b")}), {("flow", "a"), ("node", "b"), ("label", "a: b")}),
+ ],
+)
+async def test_get_current_label(context: Context, expected: set):
+ result = await default_extractors.get_current_label(context, None, {"component": {"path": "."}})
+ assert expected.intersection(set(result.items())) == expected
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ "context,expected",
+ [
+ (Context(), set()),
+ (Context(labels={0: ("a", "b")}), {("flow", "a"), ("node", "b"), ("label", "a: b")}),
+ ],
+)
+async def test_otlp_integration(context, expected, tracer_exporter_and_provider, log_exporter_and_provider):
+ _, tracer_provider = tracer_exporter_and_provider
+ log_exporter, logger_provider = log_exporter_and_provider
+ instrumentor = OtelInstrumentor()
+ if instrumentor.is_instrumented_by_opentelemetry:
+ instrumentor.uninstrument()
+ instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider)
+ runtime_info = ExtraHandlerRuntimeInfo(
+ func=lambda x: x,
+ stage="BEFORE",
+ component=ServiceRuntimeInfo(
+ path=".", name=".", timeout=None, asynchronous=False, execution_state={".": "FINISHED"}
+ ),
+ )
+ _ = await default_extractors.get_current_label(context, None, runtime_info)
+ tracer_provider.force_flush()
+ logger_provider.force_flush()
+ assert len(log_exporter.get_finished_logs()) > 0
diff --git a/tests/stats/test_instrumentation.py b/tests/stats/test_instrumentation.py
new file mode 100644
index 000000000..67356bc08
--- /dev/null
+++ b/tests/stats/test_instrumentation.py
@@ -0,0 +1,38 @@
+import pytest
+
+try:
+ from dff.stats import default_extractors
+ from dff.stats import OtelInstrumentor
+ from opentelemetry.sdk.trace import TracerProvider
+ from opentelemetry.sdk._logs import LoggerProvider
+ from opentelemetry.sdk.metrics import MeterProvider
+ from opentelemetry.trace import get_tracer_provider
+ from opentelemetry.metrics import get_meter_provider
+ from opentelemetry._logs import get_logger_provider
+except ImportError:
+ pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.")
+
+
+def test_instrument_uninstrument():
+ instrumentor = OtelInstrumentor()
+ instrumentor.instrument()
+ assert hasattr(default_extractors.get_current_label, "__wrapped__")
+ assert hasattr(default_extractors.get_timing_before, "__wrapped__")
+ assert hasattr(default_extractors.get_timing_after, "__wrapped__")
+ instrumentor.uninstrument()
+ assert not hasattr(default_extractors.get_current_label, "__wrapped__")
+ assert not hasattr(default_extractors.get_timing_before, "__wrapped__")
+ assert not hasattr(default_extractors.get_timing_after, "__wrapped__")
+
+
+def test_keyword_arguments():
+ instrumentor = OtelInstrumentor()
+ assert instrumentor._meter_provider is get_meter_provider()
+ assert instrumentor._logger_provider is get_logger_provider()
+ assert instrumentor._tracer_provider is get_tracer_provider()
+ instrumentor.instrument(
+ tracer_provider=TracerProvider(), meter_provider=MeterProvider(), logger_provider=LoggerProvider()
+ )
+ assert instrumentor._meter_provider is not get_meter_provider()
+ assert instrumentor._logger_provider is not get_logger_provider()
+ assert instrumentor._tracer_provider is not get_tracer_provider()
diff --git a/tests/stats/test_main.py b/tests/stats/test_main.py
new file mode 100644
index 000000000..ac72fbf9b
--- /dev/null
+++ b/tests/stats/test_main.py
@@ -0,0 +1,151 @@
+import os
+import pytest
+from urllib import parse
+from zipfile import ZipFile
+from argparse import Namespace
+
+try:
+ import omegaconf # noqa: F401
+ from dff.stats.__main__ import main
+ from dff.stats.cli import DEFAULT_SUPERSET_URL, DASHBOARD_SLUG
+ from dff.stats.utils import get_superset_session, drop_superset_assets
+except ImportError:
+ pytest.skip(reason="`OmegaConf` dependency missing.", allow_module_level=True)
+
+from tests.context_storages.test_dbs import ping_localhost
+from tests.test_utils import get_path_from_tests_to_current_dir
+
+SUPERSET_ACTIVE = ping_localhost(8088)
+path_to_addon = get_path_from_tests_to_current_dir(__file__)
+
+
+def dashboard_display_test(args: Namespace, session, headers, base_url: str):
+ dashboard_url = parse.urljoin(base_url, f"/api/v1/dashboard/{DASHBOARD_SLUG}")
+ charts_url = parse.urljoin(base_url, "/api/v1/chart")
+ datasets_url = parse.urljoin(base_url, "/api/v1/dataset")
+ database_conn_url = parse.urljoin(base_url, "/api/v1/database/test_connection")
+ db_driver, db_user, db_password, db_host, db_port, db_name = (
+ getattr(args, "db.driver"),
+ getattr(args, "db.user"),
+ getattr(args, "db.password"),
+ getattr(args, "db.host"),
+ getattr(args, "db.port"),
+ getattr(args, "db.name"),
+ )
+ sqla_url = f"{db_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
+ database_data = {
+ "configuration_method": "sqlalchemy_form",
+ "database_name": "dff_database",
+ "driver": "string",
+ "engine": None,
+ "extra": "",
+ "impersonate_user": False,
+ "masked_encrypted_extra": "",
+ "parameters": {},
+ "server_cert": None,
+ "sqlalchemy_uri": sqla_url,
+ "ssh_tunnel": None,
+ }
+
+ database_res = session.post(database_conn_url, json=database_data, headers=headers)
+ assert database_res.status_code == 200
+ dashboard_res = session.get(dashboard_url, headers=headers)
+ assert dashboard_res.status_code == 200
+ dashboard_json = dashboard_res.json()
+ assert sorted(dashboard_json["result"]["charts"]) == [
+ "Flow visit ratio monitor",
+ "Node Visits",
+ "Node counts",
+ "Node visit ratio monitor",
+ "Node visits [cloud]",
+ "Node visits [ratio]",
+ "Node visits [sunburst]",
+ "Service load [users]",
+ "Table",
+ "Terminal labels",
+ "Transition counts",
+ "Transition layout",
+ "Transition ratio [chord]",
+ ]
+ assert dashboard_json["result"]["url"] == "/superset/dashboard/dff-stats/"
+ assert dashboard_json["result"]["dashboard_title"] == "DFF Stats"
+ datasets_result = session.get(datasets_url, headers=headers)
+ datasets_json = datasets_result.json()
+ assert datasets_json["count"] == 2
+ assert datasets_json["ids"] == [1, 2]
+ assert [item["id"] for item in datasets_json["result"]] == [1, 2]
+ assert sorted([item["table_name"] for item in datasets_json["result"]]) == [
+ "dff_final_nodes",
+ "dff_node_stats",
+ ]
+ charts_result = session.get(charts_url, headers=headers)
+ charts_json = charts_result.json()
+ assert charts_json["count"] == 14
+ assert sorted(charts_json["ids"]) == list(range(1, 15))
+ session.close()
+
+
+@pytest.mark.skipif(not SUPERSET_ACTIVE, reason="Superset server not active")
+@pytest.mark.parametrize(
+ ["args"],
+ [
+ (
+ Namespace(
+ **{
+ "outfile": "1.zip",
+ "db.driver": "clickhousedb+connect",
+ "db.host": "clickhouse",
+ "db.port": "8123",
+ "db.name": "test",
+ "db.table": "otel_logs",
+ "host": "localhost",
+ "port": "8088",
+ "file": f"tutorials/{path_to_addon}/example_config.yaml",
+ }
+ ),
+ ),
+ ],
+)
+@pytest.mark.docker
+def test_main(testing_cfg_dir, args):
+ args.__dict__.update(
+ {
+ "db.password": os.environ["CLICKHOUSE_PASSWORD"],
+ "username": os.environ["SUPERSET_USERNAME"],
+ "password": os.environ["SUPERSET_PASSWORD"],
+ "db.user": os.environ["CLICKHOUSE_USER"],
+ }
+ )
+ args.outfile = testing_cfg_dir + args.outfile
+ session, headers = get_superset_session(args, DEFAULT_SUPERSET_URL)
+ dashboard_url = parse.urljoin(DEFAULT_SUPERSET_URL, "/api/v1/dashboard/")
+
+ drop_superset_assets(session, headers, DEFAULT_SUPERSET_URL)
+ dashboard_result = session.get(dashboard_url, headers=headers)
+ dashboard_json = dashboard_result.json()
+ assert dashboard_json["count"] == 0
+
+ main(args)
+ dashboard_display_test(args, session, headers, base_url=DEFAULT_SUPERSET_URL)
+ assert os.path.exists(args.outfile)
+ assert os.path.isfile(args.outfile)
+ assert os.path.getsize(args.outfile) > 2200
+ with ZipFile(args.outfile) as file:
+ file.extractall(testing_cfg_dir)
+ database = omegaconf.OmegaConf.load(os.path.join(testing_cfg_dir, "superset_dashboard/databases/dff_database.yaml"))
+ sqlalchemy_uri = omegaconf.OmegaConf.select(database, "sqlalchemy_uri")
+ arg_vars = vars(args)
+ driver, user, host, port, name = (
+ arg_vars["db.driver"],
+ arg_vars["db.user"],
+ arg_vars["db.host"],
+ arg_vars["db.port"],
+ arg_vars["db.name"],
+ )
+ assert sqlalchemy_uri == f"{driver}://{user}:XXXXXXXXXX@{host}:{port}/{name}"
+
+
+@pytest.mark.parametrize(["cmd"], [("dff.stats -h",), ("dff.stats --help",)])
+def test_help(cmd):
+ res = os.system(cmd)
+ assert res == 0
diff --git a/tests/stats/test_patch.py b/tests/stats/test_patch.py
new file mode 100644
index 000000000..284ea0b69
--- /dev/null
+++ b/tests/stats/test_patch.py
@@ -0,0 +1,19 @@
+import pytest
+
+try:
+ from dff import stats # noqa: F401
+ from opentelemetry.proto.common.v1.common_pb2 import AnyValue
+ from opentelemetry.exporter.otlp.proto.grpc.exporter import _translate_value
+except ImportError:
+ pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.")
+
+
+@pytest.mark.parametrize(
+ ["value", "expected_field"], [(1, "int_value"), ({"a": "b"}, "kvlist_value"), (None, "string_value")]
+)
+def test_body_translation(value, expected_field):
+ assert _translate_value.__wrapped__.__name__ == "_translate_value"
+ translated_value = _translate_value(value)
+ assert isinstance(translated_value, AnyValue)
+ assert translated_value.IsInitialized()
+ assert getattr(translated_value, expected_field, None) is not None
diff --git a/tests/stats/test_tutorials.py b/tests/stats/test_tutorials.py
new file mode 100644
index 000000000..969dc6bf5
--- /dev/null
+++ b/tests/stats/test_tutorials.py
@@ -0,0 +1,90 @@
+import os
+import importlib
+import pytest
+import asyncio
+
+from tests.test_utils import get_path_from_tests_to_current_dir
+from tests.context_storages.test_dbs import ping_localhost
+from dff.utils.testing.common import check_happy_path
+from dff.utils.testing.toy_script import HAPPY_PATH
+
+try:
+ from aiochclient import ChClient
+ from httpx import AsyncClient
+ from dff import stats # noqa: F401
+except ImportError:
+ pytest.skip(allow_module_level=True, reason="There are dependencies missing.")
+
+
+dot_path_to_addon = get_path_from_tests_to_current_dir(__file__)
+
+
+COLLECTOR_AVAILABLE = ping_localhost(4317)
+CLICKHOUSE_AVAILABLE = ping_localhost(8123)
+CLICKHOUSE_USER = os.getenv("CLICKHOUSE_USER")
+CLICKHOUSE_PASSWORD = os.getenv("CLICKHOUSE_PASSWORD")
+CLICKHOUSE_DB = os.getenv("CLICKHOUSE_DB")
+
+
+@pytest.mark.skipif(not CLICKHOUSE_AVAILABLE, reason="Clickhouse unavailable.")
+@pytest.mark.skipif(not COLLECTOR_AVAILABLE, reason="OTLP collector unavailable.")
+@pytest.mark.skipif(
+ not all([CLICKHOUSE_USER, CLICKHOUSE_PASSWORD, CLICKHOUSE_DB]), reason="Clickhouse credentials missing"
+)
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ ["example_module_name", "expected_logs"],
+ [
+ ("1_extractor_functions", 10),
+ ("2_pipeline_integration", 35),
+ ],
+)
+@pytest.mark.docker
+async def test_examples_ch(example_module_name: str, expected_logs, otlp_log_exp_provider, otlp_trace_exp_provider):
+ module = importlib.import_module(f"tutorials.{dot_path_to_addon}.{example_module_name}")
+ _, tracer_provider = otlp_trace_exp_provider
+ _, logger_provider = otlp_log_exp_provider
+ http_client = AsyncClient()
+ table = "otel_logs"
+ ch_client = ChClient(http_client, user=CLICKHOUSE_USER, password=CLICKHOUSE_PASSWORD, database=CLICKHOUSE_DB)
+
+ try:
+ await ch_client.execute(f"TRUNCATE {table}")
+ pipeline = module.pipeline
+ module.dff_instrumentor.uninstrument()
+ module.dff_instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider)
+ check_happy_path(pipeline, HAPPY_PATH)
+ await asyncio.sleep(1)
+ count = await ch_client.fetchval(f"SELECT COUNT (*) FROM {table}")
+ assert count == expected_logs
+
+ except Exception as exc:
+ raise Exception(f"model_name=tutorials.{dot_path_to_addon}.{example_module_name}") from exc
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ ["example_module_name", "expected_logs"],
+ [
+ ("1_extractor_functions", 10),
+ ("2_pipeline_integration", 35),
+ ],
+)
+async def test_examples_memory(
+ example_module_name: str, expected_logs, tracer_exporter_and_provider, log_exporter_and_provider
+):
+ module = importlib.import_module(f"tutorials.{dot_path_to_addon}.{example_module_name}")
+ _, tracer_provider = tracer_exporter_and_provider
+ log_exporter, logger_provider = log_exporter_and_provider
+ try:
+ pipeline = module.pipeline
+ module.dff_instrumentor.uninstrument()
+ module.dff_instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider)
+ check_happy_path(pipeline, HAPPY_PATH)
+ tracer_provider.force_flush()
+ logger_provider.force_flush()
+ await asyncio.sleep(1)
+ assert len(log_exporter.get_finished_logs()) == expected_logs
+
+ except Exception as exc:
+ raise Exception(f"model_name=tutorials.{dot_path_to_addon}.{example_module_name}") from exc
diff --git a/tutorials/stats/1_extractor_functions.py b/tutorials/stats/1_extractor_functions.py
new file mode 100644
index 000000000..20f371ba6
--- /dev/null
+++ b/tutorials/stats/1_extractor_functions.py
@@ -0,0 +1,111 @@
+# %% [markdown]
+"""
+# 1. Extractor Functions
+
+The following example covers the basics of using the `stats` module.
+
+Statistics are collected from pipeline services by extractor functions
+that report the state of one or more pipeline components. The `stats` module
+provides several default extractors, but users are free to define their own
+extractor functions.
+
+It is required that the extractors have the following uniform signature:
+the expected arguments are always `Context`, `Pipeline`, and `ExtraHandlerRuntimeInfo`,
+while the expected return value is an arbitrary `dict` or a `None`. It is a preferred practice
+to define extractors as asynchronous functions.
+
+The output of these functions will be captured by an OpenTelemetry instrumentor and directed to
+the Opentelemetry collector server which in its turn batches and persists data
+to Clickhouse or other OLAP storages.
+For more information on OpenTelemetry instrumentation,
+refer to the body of this tutorial as well as [OpenTelemetry documentation](
+https://opentelemetry.io/docs/instrumentation/python/manual/
+).
+
+"""
+
+# %pip install dff[stats]
+
+# %%
+import asyncio
+
+from dff.script import Context
+from dff.pipeline import Pipeline, ACTOR, Service, ExtraHandlerRuntimeInfo, to_service
+from dff.utils.testing.toy_script import TOY_SCRIPT, HAPPY_PATH
+from dff.stats import OtelInstrumentor, default_extractors
+from dff.utils.testing import is_interactive_mode, check_happy_path
+
+
+# %% [markdown]
+"""
+The cells below configure log export with the help of OTLP instrumentation.
+
+* The initial step is to configure the export destination.
+`from_url` method of the `OtelInstrumentor` class simplifies this task
+allowing you to only pass the url of the OTLP Collector server.
+
+* Alternatively, you can use the utility functions provided by the `stats` module:
+`set_logger_destination`, `set_tracer_destination`, or `set_meter_destination`. These accept
+an appropriate Opentelemetry exporter instance and bind it to provider classes.
+
+* Nextly, the `OtelInstrumentor` class should be constructed to log the output
+of extractor functions. Custom extractors can be decorated with the `OtelInstrumentor` instance.
+Default extractors are instrumented by calling the `instrument` method.
+
+* The entirety of the process is illustrated in the example below.
+
+"""
+
+
+# %%
+dff_instrumentor = OtelInstrumentor.from_url("grpc://localhost:4317")
+dff_instrumentor.instrument()
+
+# %% [markdown]
+"""
+The following cell shows a custom extractor function. The data obtained from
+the context and the runtime information gets shaped as a dict and returned
+from the function body. The `dff_instrumentor` decorator then ensures
+that the output is logged by OpenTelemetry.
+
+"""
+
+
+# %%
+# decorated by an OTLP Instrumentor instance
+@dff_instrumentor
+async def get_service_state(ctx: Context, _, info: ExtraHandlerRuntimeInfo):
+ # extract the execution state of a target service
+ data = {
+ "execution_state": info.component.execution_state,
+ }
+ # return the state as an arbitrary dict for further logging
+ return data
+
+
+# %%
+# configure `get_service_state` to run after the `heavy_service`
+@to_service(after_handler=[get_service_state])
+async def heavy_service(ctx: Context):
+ _ = ctx # get something from ctx if needed
+ await asyncio.sleep(0.02)
+
+
+# %%
+pipeline = Pipeline.from_dict(
+ {
+ "script": TOY_SCRIPT,
+ "start_label": ("greeting_flow", "start_node"),
+ "fallback_label": ("greeting_flow", "fallback_node"),
+ "components": [
+ heavy_service,
+ Service(handler=ACTOR, after_handler=[default_extractors.get_current_label]),
+ ],
+ }
+)
+
+
+if __name__ == "__main__":
+ check_happy_path(pipeline, HAPPY_PATH)
+ if is_interactive_mode():
+ pipeline.run()
diff --git a/tutorials/stats/2_pipeline_integration.py b/tutorials/stats/2_pipeline_integration.py
new file mode 100644
index 000000000..de81a3593
--- /dev/null
+++ b/tutorials/stats/2_pipeline_integration.py
@@ -0,0 +1,110 @@
+# %% [markdown]
+"""
+# 2. Pipeline Integration
+
+In the DFF ecosystem, extractor functions act as regular extra handlers (
+[see the pipeline module documentation](%doclink(tutorial,pipeline.7_extra_handlers_basic))
+).
+Hence, you can decorate any part of your pipeline, including services,
+service groups and the pipeline as a whole, to obtain the statistics
+specific for that component. Some examples of this functionality
+are showcased in this tutorial.
+
+"""
+
+# %pip install dff[stats]
+
+# %%
+import asyncio
+
+from dff.script import Context
+from dff.pipeline import (
+ Pipeline,
+ ACTOR,
+ Service,
+ ExtraHandlerRuntimeInfo,
+ ServiceGroup,
+ GlobalExtraHandlerType,
+)
+from dff.utils.testing.toy_script import TOY_SCRIPT, HAPPY_PATH
+from dff.stats import OtelInstrumentor, set_logger_destination, set_tracer_destination
+from dff.stats import OTLPLogExporter, OTLPSpanExporter
+from dff.stats import default_extractors
+from dff.utils.testing import is_interactive_mode, check_happy_path
+
+# %%
+set_logger_destination(OTLPLogExporter("grpc://localhost:4317", insecure=True))
+set_tracer_destination(OTLPSpanExporter("grpc://localhost:4317", insecure=True))
+dff_instrumentor = OtelInstrumentor()
+dff_instrumentor.instrument()
+
+
+# example extractor function
+@dff_instrumentor
+async def get_service_state(ctx: Context, _, info: ExtraHandlerRuntimeInfo):
+ # extract execution state of service from info
+ data = {
+ "execution_state": info.component.execution_state,
+ }
+ # return a record to save into connected database
+ return data
+
+
+# %%
+# example service
+async def heavy_service(ctx: Context):
+ _ = ctx # get something from ctx if needed
+ await asyncio.sleep(0.02)
+
+
+# %% [markdown]
+"""
+
+The many ways in which you can use extractor functions are shown in the following
+pipeline definition. The functions are used to obtain statistics from respective components:
+
+* A service group of two `heavy_service` instances.
+* An `Actor` service.
+* The pipeline as a whole.
+
+As is the case with the regular extra handler functions, you can wire the extractors
+to run either before or after the target service. As a result, you can compare
+the pre-service and post-service states of the context to measure the performance
+of various components, etc.
+
+"""
+# %%
+pipeline = Pipeline.from_dict(
+ {
+ "script": TOY_SCRIPT,
+ "start_label": ("greeting_flow", "start_node"),
+ "fallback_label": ("greeting_flow", "fallback_node"),
+ "components": [
+ ServiceGroup(
+ before_handler=[default_extractors.get_timing_before],
+ after_handler=[
+ get_service_state,
+ default_extractors.get_timing_after,
+ ],
+ components=[{"handler": heavy_service}, {"handler": heavy_service}],
+ ),
+ Service(
+ handler=ACTOR,
+ before_handler=[default_extractors.get_timing_before],
+ after_handler=[
+ get_service_state,
+ default_extractors.get_timing_after,
+ default_extractors.get_current_label,
+ ],
+ ),
+ ],
+ }
+)
+pipeline.add_global_handler(GlobalExtraHandlerType.BEFORE_ALL, default_extractors.get_timing_before)
+pipeline.add_global_handler(GlobalExtraHandlerType.AFTER_ALL, default_extractors.get_timing_after)
+pipeline.add_global_handler(GlobalExtraHandlerType.AFTER_ALL, get_service_state)
+
+if __name__ == "__main__":
+ check_happy_path(pipeline, HAPPY_PATH)
+ if is_interactive_mode():
+ pipeline.run()
diff --git a/tutorials/stats/example_config.yaml b/tutorials/stats/example_config.yaml
new file mode 100644
index 000000000..f01db7d35
--- /dev/null
+++ b/tutorials/stats/example_config.yaml
@@ -0,0 +1,7 @@
+db:
+ driver: clickhousedb+connect
+ name: test
+ user: username
+ host: clickhouse
+ port: 8123
+ table: otel_logs
\ No newline at end of file