diff --git a/.env_file b/.env_file index d943f71b0..15b7a209a 100644 --- a/.env_file +++ b/.env_file @@ -14,3 +14,9 @@ MON_PORT=8765 YDB_ENDPOINT=grpc://localhost:2136 YDB_DATABASE=/local YDB_ANONYMOUS_CREDENTIALS=1 +CLICKHOUSE_DB=test +CLICKHOUSE_USER=username +CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 +CLICKHOUSE_PASSWORD=pass +SUPERSET_USERNAME=superset +SUPERSET_PASSWORD=superset \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 7707509be..f7a91262a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,6 @@ + * text=auto +*.png binary +*.jpg binary *.py text eol=lf -*.sh text eol=lf -* text working-tree-encoding=UTF-8 \ No newline at end of file +*.sh text eol=lf \ No newline at end of file diff --git a/.github/workflows/update_dashboard.yml b/.github/workflows/update_dashboard.yml new file mode 100644 index 000000000..6beeadd62 --- /dev/null +++ b/.github/workflows/update_dashboard.yml @@ -0,0 +1,45 @@ +name: update_dashboard + +on: + push: + branches: + - 'dev' + paths: + - 'dff/utils/docker/dockerfile_stats' + - 'dff/utils/docker/entrypoint_stats.sh' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + build_and_publish_dashboard: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Log in to container registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract Docker metadata + uses: docker/metadata-action@v4 + with: + images: ghcr.io/${{ github.repository }}/superset_df_dashboard + tags: | + type=ref,event=branch + type=semver,event={{version}} + + - name: Build and upload image + uses: docker/build-push-action@v4 + with: + file: dff/utils/docker/dockerfile_stats + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 8bd17ef6f..2bf8bfe27 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include CONTRIBUTING.md include LICENSE include README.md +graft dff/config/superset_dashboard include dff/context_storages/protocols.json exclude makefile diff --git a/dff/config/README.md b/dff/config/README.md new file mode 100644 index 000000000..f060b14d6 --- /dev/null +++ b/dff/config/README.md @@ -0,0 +1,10 @@ +# Superset dashboard config + +## Description + +This directory provides yaml files for Superset dashboard configuration. +The files inside are not supposed to be edited manually for lest of compatibility breaks. +Placeholders inside the files will be filled automatically when you use the +`dff.stats` CLI command to generate a configuration archive. + +Use `dff.stats -h` for more info. \ No newline at end of file diff --git a/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml b/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml new file mode 100644 index 000000000..4fb4c2124 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_1.yaml @@ -0,0 +1,78 @@ +slice_name: Flow visit ratio monitor +description: null +certified_by: null +certification_details: null +viz_type: echarts_timeseries_bar +params: + datasource: 2__table + viz_type: echarts_timeseries_bar + slice_id: 1 + granularity_sqla: start_time + time_grain_sqla: null + time_range: No filter + metrics: + - aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_waxxrrnkwwm_zudaex5z8bh + sqlExpression: null + groupby: + - flow_label + contributionMode: column + adhoc_filters: [] + order_desc: true + row_limit: 10000 + truncate_metric: true + show_empty_columns: true + comparison_type: values + annotation_layers: [] + forecastPeriods: 10 + forecastInterval: 0.8 + orientation: vertical + x_axis_title_margin: 15 + y_axis_title: '' + y_axis_title_margin: 50 + y_axis_title_position: Left + color_scheme: supersetColors + stack: true + only_total: true + zoomable: true + show_legend: true + legendType: scroll + legendOrientation: top + x_axis_time_format: smart_date + y_axis_format: SMART_NUMBER + y_axis_bounds: + - null + - null + rich_tooltip: true + tooltipTimeFormat: smart_date + extra_form_data: {} + dashboards: + - 1 +query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null},false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label"],"aggregates":{"COUNT(context_id)":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"COUNT(context_id)":null},"level":0,"inplace":true}},{"operation":"contribution","options":{"orientation":"column"}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_bar","slice_id":1,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No + filter","metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_waxxrrnkwwm_zudaex5z8bh","sqlExpression":null}],"groupby":["flow_label"],"contributionMode":"column","adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title_margin":15,"y_axis_title":"","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","stack":true,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","y_axis_format":"SMART_NUMBER","y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: ba02528b-184b-4304-b027-f2b7d9011ab0 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_Visits_2.yaml b/dff/config/superset_dashboard/charts/Node_Visits_2.yaml new file mode 100644 index 000000000..802288e28 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_Visits_2.yaml @@ -0,0 +1,59 @@ +slice_name: Node Visits +description: null +certified_by: null +certification_details: null +viz_type: dist_bar +params: + adhoc_filters: [] + bottom_margin: auto + color_scheme: supersetColors + columns: + - label + datasource: 3__table + extra_form_data: {} + granularity_sqla: start_time + groupby: + - request_id + metrics: + - aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 20 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_l2mle87zvnb_nfqzdxmig1d + sqlExpression: null + order_desc: true + rich_tooltip: true + row_limit: 10000 + show_legend: true + time_range: No filter + viz_type: dist_bar + x_axis_label: History id + x_ticks_layout: auto + y_axis_bounds: + - null + - null + y_axis_format: SMART_NUMBER + y_axis_label: Node visits +query_context: null +cache_timeout: null +uuid: 44f4ab9d-5072-4926-a6ed-8615fb81b3d0 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_counts_3.yaml b/dff/config/superset_dashboard/charts/Node_counts_3.yaml new file mode 100644 index 000000000..8cf476c2f --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_counts_3.yaml @@ -0,0 +1,59 @@ +slice_name: Node counts +description: null +certified_by: null +certification_details: null +viz_type: dist_bar +params: + adhoc_filters: [] + bar_stacked: true + bottom_margin: auto + color_scheme: supersetColors + columns: + - flow_label + datasource: 1__table + extra_form_data: {} + granularity_sqla: start_time + groupby: + - label + metrics: + - aggregate: COUNT_DISTINCT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT_DISTINCT(context_id) + optionName: metric_axee7fzlpu_upud0bdjv6 + sqlExpression: null + order_bars: true + order_desc: true + rich_tooltip: true + row_limit: 10000 + show_legend: true + time_range: No filter + viz_type: dist_bar + x_ticks_layout: auto + y_axis_bounds: + - null + - null + y_axis_format: SMART_NUMBER +query_context: null +cache_timeout: null +uuid: 0c47c7b5-f500-46cb-97e3-9ebb637f0c8a +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml b/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml new file mode 100644 index 000000000..93c1bf239 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_4.yaml @@ -0,0 +1,86 @@ +slice_name: Node visit ratio monitor +description: null +certified_by: null +certification_details: null +viz_type: echarts_timeseries_bar +params: + datasource: 2__table + viz_type: echarts_timeseries_bar + slice_id: 4 + granularity_sqla: start_time + time_grain_sqla: null + time_range: No filter + metrics: + - aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_9yefk3wj2g_5wbp61n0pyr + sqlExpression: null + groupby: + - flow_label + - node_label + contributionMode: column + adhoc_filters: [] + order_desc: true + row_limit: 10000 + truncate_metric: true + show_empty_columns: true + comparison_type: values + annotation_layers: [] + forecastPeriods: 10 + forecastInterval: 0.8 + orientation: vertical + x_axis_title: Datetime + x_axis_title_margin: 30 + y_axis_title: Node visit ratio + y_axis_title_margin: 50 + y_axis_title_position: Left + color_scheme: supersetColors + stack: true + only_total: true + zoomable: true + show_legend: true + legendType: scroll + legendOrientation: top + x_axis_time_format: smart_date + xAxisLabelRotation: 45 + y_axis_format: SMART_NUMBER + logAxis: false + minorSplitLine: false + truncateYAxis: false + y_axis_bounds: + - null + - null + rich_tooltip: true + tooltipSortByMetric: true + tooltipTimeFormat: smart_date + extra_form_data: {} + dashboards: + - 1 +query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null},false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label","node_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label","node_label"],"aggregates":{"COUNT(context_id)":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"COUNT(context_id)":null},"level":0,"inplace":true}},{"operation":"contribution","options":{"orientation":"column"}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_bar","slice_id":4,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No + filter","metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_9yefk3wj2g_5wbp61n0pyr","sqlExpression":null}],"groupby":["flow_label","node_label"],"contributionMode":"column","adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title":"Datetime","x_axis_title_margin":30,"y_axis_title":"Node + visit ratio","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","stack":true,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","xAxisLabelRotation":45,"y_axis_format":"SMART_NUMBER","logAxis":false,"minorSplitLine":false,"truncateYAxis":false,"y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipSortByMetric":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: 6fafe59c-0fec-4cd8-a8b3-c0bfaffb2135 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml b/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml new file mode 100644 index 000000000..334bc85c7 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_visits_cloud_5.yaml @@ -0,0 +1,48 @@ +slice_name: Node visits [cloud] +description: null +certified_by: null +certification_details: null +viz_type: word_cloud +params: + adhoc_filters: [] + color_scheme: supersetColors + datasource: 1__table + extra_form_data: {} + granularity_sqla: start_time + metric: + aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_mmbslhy6cnd_6zv1lh26whx + sqlExpression: null + rotation: flat + row_limit: 500 + series: label + size_from: 10 + size_to: 80 + time_range: No filter + viz_type: word_cloud +query_context: null +cache_timeout: null +uuid: b25b4292-ff21-4164-98ac-b1cba95e2994 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml b/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml new file mode 100644 index 000000000..f3d71f8e9 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml @@ -0,0 +1,65 @@ +slice_name: Node visits [ratio] +description: null +certified_by: null +certification_details: null +viz_type: pie +params: + adhoc_filters: [] + color_scheme: supersetColors + datasource: 1__table + date_format: smart_date + donut: true + extra_form_data: {} + granularity_sqla: start_time + groupby: + - flow_label + - node_label + innerRadius: 36 + label_line: false + label_type: key + labels_outside: true + legendMargin: 100 + legendOrientation: top + legendType: plain + metric: + aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_lk827d91wws_mq94n624y6 + sqlExpression: null + number_format: SMART_NUMBER + outerRadius: 70 + row_limit: 100 + show_labels: true + show_labels_threshold: 5 + show_legend: true + show_total: true + sort_by_metric: true + time_range: No filter + viz_type: pie +query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null}],"orderby":[[{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null},false]],"annotation_layers":[],"row_limit":100,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"adhoc_filters":[],"color_scheme":"supersetColors","datasource":"2__table","date_format":"smart_date","donut":true,"extra_form_data":{},"granularity_sqla":"start_time","groupby":["flow_label","node_label"],"innerRadius":36,"label_line":false,"label_type":"key","labels_outside":true,"legendMargin":100,"legendOrientation":"top","legendType":"plain","metric":{"aggregate":"COUNT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":1,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"STRING","type_generic":1,"verbose_name":null,"warning_markdown":null},"expressionType":"SIMPLE","hasCustomLabel":false,"isNew":false,"label":"COUNT(context_id)","optionName":"metric_lk827d91wws_mq94n624y6","sqlExpression":null},"number_format":"SMART_NUMBER","outerRadius":70,"row_limit":100,"show_labels":true,"show_labels_threshold":5,"show_legend":true,"show_total":true,"slice_id":6,"sort_by_metric":true,"time_range":"No + filter","viz_type":"pie","force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: f9fb7893-3533-4519-bbc4-1f4853f380e1 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml b/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml new file mode 100644 index 000000000..b3293a635 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Node_visits_sunburst_7.yaml @@ -0,0 +1,69 @@ +slice_name: Node visits [sunburst] +description: null +certified_by: null +certification_details: null +viz_type: sunburst +params: + adhoc_filters: + - clause: WHERE + comparator: null + expressionType: SIMPLE + filterOptionName: filter_82x0yx6fkpv_6h8a2190nmh + isExtra: false + isNew: false + operator: IS NOT NULL + operatorId: IS_NOT_NULL + sqlExpression: null + subject: flow_label + - clause: WHERE + comparator: null + expressionType: SIMPLE + filterOptionName: filter_653tn7jqmox_x65gwtz29gc + isExtra: false + isNew: false + operator: IS NOT NULL + operatorId: IS_NOT_NULL + sqlExpression: null + subject: node_label + color_scheme: supersetColors + datasource: 3__table + extra_form_data: {} + granularity_sqla: start_time + groupby: + - flow_label + - node_label + linear_color_scheme: superset_seq_1 + metric: + aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_7mo9cnw40ph_rzxhx01jm0c + sqlExpression: null + row_limit: 10000 + slice_id: 8 + time_range: No filter + viz_type: sunburst +query_context: null +cache_timeout: null +uuid: e955f48c-824c-423c-a11c-5b5dca162927 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml b/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml new file mode 100644 index 000000000..1e087769a --- /dev/null +++ b/dff/config/superset_dashboard/charts/Service_load_max_dialogue_length_8.yaml @@ -0,0 +1,83 @@ +slice_name: Service load [max dialogue length] +description: null +certified_by: null +certification_details: null +viz_type: echarts_timeseries_step +params: + datasource: 2__table + viz_type: echarts_timeseries_step + slice_id: 8 + granularity_sqla: start_time + time_grain_sqla: null + time_range: No filter + metrics: + - aggregate: null + column: null + datasourceWarning: false + expressionType: SQL + hasCustomLabel: false + label: AVG(CAST(request_id AS Int16)) + optionName: metric_8h0zfurdcy_tzo5q0tuczi + sqlExpression: AVG(CAST(request_id AS Int16)) + - aggregate: null + column: null + datasourceWarning: false + expressionType: SQL + hasCustomLabel: false + label: MAX(CAST(request_id AS Int16)) + optionName: metric_wz5zx1kqqc_u42eslame2 + sqlExpression: MAX(CAST(request_id AS Int16)) + groupby: [] + adhoc_filters: [] + order_desc: true + row_limit: 10000 + truncate_metric: true + show_empty_columns: true + comparison_type: values + annotation_layers: [] + forecastPeriods: 10 + forecastInterval: 0.8 + x_axis_title_margin: 15 + y_axis_title: AVG/MAX dialogue length + y_axis_title_margin: 50 + y_axis_title_position: Left + color_scheme: supersetColors + seriesType: start + only_total: true + opacity: 0.2 + markerSize: 6 + zoomable: true + show_legend: true + legendType: scroll + legendOrientation: top + x_axis_time_format: smart_date + rich_tooltip: true + tooltipSortByMetric: false + tooltipTimeFormat: smart_date + y_axis_format: SMART_NUMBER + logAxis: false + y_axis_bounds: + - null + - null + extra_form_data: {} + dashboards: + - 1 +query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id + AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id + AS Int16))"},{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"MAX(CAST(request_id + AS Int16))","optionName":"metric_wz5zx1kqqc_u42eslame2","sqlExpression":"MAX(CAST(request_id + AS Int16))"}],"orderby":[[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id + AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id + AS Int16))"},false]],"annotation_layers":[],"row_limit":10000,"series_columns":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":[],"aggregates":{"AVG(CAST(request_id + AS Int16))":{"operator":"mean"},"MAX(CAST(request_id AS Int16))":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"echarts_timeseries_step","slice_id":8,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No + filter","metrics":[{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"AVG(CAST(request_id + AS Int16))","optionName":"metric_8h0zfurdcy_tzo5q0tuczi","sqlExpression":"AVG(CAST(request_id + AS Int16))"},{"aggregate":null,"column":null,"datasourceWarning":false,"expressionType":"SQL","hasCustomLabel":false,"label":"MAX(CAST(request_id + AS Int16))","optionName":"metric_wz5zx1kqqc_u42eslame2","sqlExpression":"MAX(CAST(request_id + AS Int16))"}],"groupby":[],"adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"x_axis_title_margin":15,"y_axis_title":"AVG/MAX + dialogue length","y_axis_title_margin":50,"y_axis_title_position":"Left","color_scheme":"supersetColors","seriesType":"start","only_total":true,"opacity":0.2,"markerSize":6,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","rich_tooltip":true,"tooltipSortByMetric":false,"tooltipTimeFormat":"smart_date","y_axis_format":"SMART_NUMBER","logAxis":false,"y_axis_bounds":[null,null],"extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: 276c3aa7-89bc-49ff-91b6-6232ae35c854 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Service_load_users_9.yaml b/dff/config/superset_dashboard/charts/Service_load_users_9.yaml new file mode 100644 index 000000000..126af9af3 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Service_load_users_9.yaml @@ -0,0 +1,62 @@ +slice_name: Service load [users] +description: null +certified_by: null +certification_details: null +viz_type: big_number +params: + datasource: 2__table + viz_type: big_number + slice_id: 4 + granularity_sqla: start_time + time_grain_sqla: PT5M + time_range: No filter + metric: + expressionType: SIMPLE + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 12 + is_certified: false + is_dttm: false + python_date_format: null + type: String + type_generic: 1 + verbose_name: null + warning_markdown: null + aggregate: COUNT_DISTINCT + sqlExpression: null + datasourceWarning: false + hasCustomLabel: false + label: COUNT_DISTINCT(context_id) + optionName: metric_5554up52jgr_laajjk3aaim + adhoc_filters: [] + show_timestamp: false + show_trend_line: true + start_y_axis_at_zero: true + color_picker: + a: 1 + b: 135 + g: 122 + r: 0 + header_font_size: 0.3 + subheader_font_size: 0.15 + y_axis_format: SMART_NUMBER + time_format: smart_date + force_timestamp_formatting: false + rolling_type: None + extra_form_data: {} + dashboards: + - 1 +query_context: '{"datasource":{"id":2,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":"PT5M","having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"expressionType":"SIMPLE","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":12,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"aggregate":"COUNT_DISTINCT","sqlExpression":null,"datasourceWarning":false,"hasCustomLabel":false,"label":"COUNT_DISTINCT(context_id)","optionName":"metric_5554up52jgr_laajjk3aaim"}],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":[],"aggregates":{"COUNT_DISTINCT(context_id)":{"operator":"mean"}},"drop_missing_columns":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"2__table","viz_type":"big_number","slice_id":4,"granularity_sqla":"start_time","time_grain_sqla":"PT5M","time_range":"No + filter","metric":{"expressionType":"SIMPLE","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"context_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":12,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"aggregate":"COUNT_DISTINCT","sqlExpression":null,"datasourceWarning":false,"hasCustomLabel":false,"label":"COUNT_DISTINCT(context_id)","optionName":"metric_5554up52jgr_laajjk3aaim"},"adhoc_filters":[],"show_timestamp":false,"show_trend_line":true,"start_y_axis_at_zero":true,"color_picker":{"a":1,"b":135,"g":122,"r":0},"header_font_size":0.3,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","force_timestamp_formatting":false,"rolling_type":"None","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: b5d43314-514c-464e-9fcc-f897e3ae0963 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Table_10.yaml b/dff/config/superset_dashboard/charts/Table_10.yaml new file mode 100644 index 000000000..b3f9f636c --- /dev/null +++ b/dff/config/superset_dashboard/charts/Table_10.yaml @@ -0,0 +1,34 @@ +slice_name: Table +description: null +certified_by: null +certification_details: null +viz_type: table +params: + adhoc_filters: [] + all_columns: [] + color_pn: true + datasource: 3__table + extra_form_data: {} + granularity_sqla: start_time + groupby: + - context_id + - start_time + - data_key + - data + order_by_cols: [] + order_desc: false + percent_metrics: [] + query_mode: aggregate + row_limit: 10000 + server_page_length: 10 + show_cell_bars: true + slice_id: 14 + table_timestamp_format: smart_date + time_grain_sqla: null + time_range: No filter + viz_type: table +query_context: null +cache_timeout: null +uuid: 38d353dc-c0ef-41a0-97c7-3dcbebab9e02 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml b/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml new file mode 100644 index 000000000..76daa1499 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Terminal_labels_11.yaml @@ -0,0 +1,56 @@ +slice_name: Terminal labels +description: null +certified_by: null +certification_details: null +viz_type: echarts_timeseries_bar +params: + datasource: 1__table + viz_type: echarts_timeseries_bar + slice_id: 11 + granularity_sqla: start_time + time_grain_sqla: null + time_range: No filter + metrics: + - count + groupby: + - flow_label + - node_label + contributionMode: null + adhoc_filters: [] + order_desc: true + row_limit: 10000 + truncate_metric: true + show_empty_columns: true + comparison_type: values + annotation_layers: [] + forecastPeriods: 10 + forecastInterval: 0.8 + orientation: vertical + x_axis_title_margin: 15 + y_axis_title_margin: 15 + y_axis_title_position: Left + color_scheme: supersetColors + show_value: false + stack: false + only_total: true + zoomable: true + show_legend: true + legendType: scroll + legendOrientation: top + x_axis_time_format: smart_date + y_axis_format: SMART_NUMBER + y_axis_bounds: + - null + - null + rich_tooltip: true + tooltipTimeFormat: smart_date + extra_form_data: {} + dashboards: + - 1 +query_context: '{"datasource":{"id":1,"type":"table"},"force":false,"queries":[{"time_range":"No + filter","granularity":"start_time","filters":[],"extras":{"time_grain_sqla":null,"having":"","where":""},"applied_time_extras":{},"columns":["flow_label","node_label"],"metrics":["count"],"orderby":[["count",false]],"annotation_layers":[],"row_limit":10000,"series_columns":["flow_label","node_label"],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"is_timeseries":true,"time_offsets":[],"post_processing":[{"operation":"pivot","options":{"index":["__timestamp"],"columns":["flow_label","node_label"],"aggregates":{"count":{"operator":"mean"}},"drop_missing_columns":false}},{"operation":"rename","options":{"columns":{"count":null},"level":0,"inplace":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"1__table","viz_type":"echarts_timeseries_bar","slice_id":11,"granularity_sqla":"start_time","time_grain_sqla":null,"time_range":"No + filter","metrics":["count"],"groupby":["flow_label","node_label"],"contributionMode":null,"adhoc_filters":[],"order_desc":true,"row_limit":10000,"truncate_metric":true,"show_empty_columns":true,"comparison_type":"values","annotation_layers":[],"forecastPeriods":10,"forecastInterval":0.8,"orientation":"vertical","x_axis_title_margin":15,"y_axis_title_margin":15,"y_axis_title_position":"Left","color_scheme":"supersetColors","show_value":false,"stack":false,"only_total":true,"zoomable":true,"show_legend":true,"legendType":"scroll","legendOrientation":"top","x_axis_time_format":"smart_date","y_axis_format":"SMART_NUMBER","y_axis_bounds":[null,null],"rich_tooltip":true,"tooltipTimeFormat":"smart_date","extra_form_data":{},"dashboards":[1],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +cache_timeout: null +uuid: cf066e41-a9e8-4f54-a875-ebf4da350b59 +version: 1.0.0 +dataset_uuid: d7f6546e-1e3a-479d-8531-05b5e73e5c05 diff --git a/dff/config/superset_dashboard/charts/Transition_counts_12.yaml b/dff/config/superset_dashboard/charts/Transition_counts_12.yaml new file mode 100644 index 000000000..f44c72191 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Transition_counts_12.yaml @@ -0,0 +1,63 @@ +slice_name: Transition counts +description: null +certified_by: null +certification_details: null +viz_type: dist_bar +params: + adhoc_filters: [] + bar_stacked: true + bottom_margin: auto + color_scheme: supersetColors + columns: + - flow_label + datasource: 1__table + extra_form_data: {} + granularity_sqla: start_time + groupby: + - prev_label + - label + metrics: + - aggregate: COUNT_DISTINCT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT_DISTINCT(context_id) + optionName: metric_tc3lb0a1pff_dyroibp08h7 + sqlExpression: null + order_desc: true + rich_tooltip: true + row_limit: 10000 + show_legend: true + slice_id: 6 + time_range: No filter + viz_type: dist_bar + x_axis_label: Transitions + x_ticks_layout: auto + y_axis_bounds: + - null + - null + y_axis_format: SMART_NUMBER + y_axis_label: Counts + y_axis_showminmax: false +query_context: null +cache_timeout: null +uuid: 9fcc7cc1-9257-4c0d-b377-b3a60a8bf3df +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Transition_layout_13.yaml b/dff/config/superset_dashboard/charts/Transition_layout_13.yaml new file mode 100644 index 000000000..5999b30ed --- /dev/null +++ b/dff/config/superset_dashboard/charts/Transition_layout_13.yaml @@ -0,0 +1,62 @@ +slice_name: Transition layout +description: null +certified_by: null +certification_details: null +viz_type: graph_chart +params: + adhoc_filters: [] + baseEdgeWidth: 3 + baseNodeSize: 20 + color_scheme: supersetColors + datasource: 1__table + draggable: false + edgeLength: 400 + edgeSymbol: none,arrow + extra_form_data: {} + friction: 0.2 + granularity_sqla: start_time + gravity: 0.3 + layout: force + legendOrientation: top + legendType: scroll + metric: + aggregate: COUNT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT(context_id) + optionName: metric_qxsyaujh63_4b75kyx6b5g + sqlExpression: null + repulsion: 1000 + roam: scale + row_limit: 10000 + selectedMode: single + show_legend: true + source: prev_label + source_category: flow_label + target: label + target_category: flow_label + time_range: No filter + viz_type: graph_chart +query_context: null +cache_timeout: null +uuid: 801e8c66-f693-46e3-a9a5-7b2b0b08a4a7 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml b/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml new file mode 100644 index 000000000..e9d1f8723 --- /dev/null +++ b/dff/config/superset_dashboard/charts/Transition_ratio_chord_14.yaml @@ -0,0 +1,48 @@ +slice_name: Transition ratio [chord] +description: null +certified_by: null +certification_details: null +viz_type: chord +params: + adhoc_filters: [] + color_scheme: bnbColors + columns: prev_label + datasource: 1__table + extra_form_data: {} + granularity_sqla: start_time + groupby: label + metric: + aggregate: COUNT_DISTINCT + column: + advanced_data_type: null + certification_details: null + certified_by: null + column_name: context_id + description: null + expression: null + filterable: true + groupby: true + id: 1 + is_certified: false + is_dttm: false + python_date_format: null + type: STRING + type_generic: 1 + verbose_name: null + warning_markdown: null + expressionType: SIMPLE + hasCustomLabel: false + isNew: false + label: COUNT_DISTINCT(context_id) + optionName: metric_97lz5hqft8j_aqofzpt1ma5 + sqlExpression: null + row_limit: 10000 + slice_id: 13 + time_range: No filter + viz_type: chord + y_axis_format: ~g +query_context: null +cache_timeout: null +uuid: 388d2359-8d13-4795-8dc9-1cb5dfa92ee1 +version: 1.0.0 +dataset_uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 diff --git a/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml b/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml new file mode 100644 index 000000000..0523b8ae1 --- /dev/null +++ b/dff/config/superset_dashboard/dashboards/DFF_Stats_1.yaml @@ -0,0 +1,555 @@ +dashboard_title: DFF Stats +description: null +css: '' +slug: dff-stats +uuid: 68bce374-99bc-4890-b8c2-cb172409b894 +position: + CHART-Af3zvLsiKV: + children: [] + id: CHART-Af3zvLsiKV + meta: + chartId: 4 + height: 66 + sliceName: Node visit ratio monitor + uuid: 6fafe59c-0fec-4cd8-a8b3-c0bfaffb2135 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + - ROW-7g6_n72hZ + type: CHART + CHART-CuCYDOlGEu: + children: [] + id: CHART-CuCYDOlGEu + meta: + chartId: 10 + height: 50 + sliceName: Table + sliceNameOverride: Stats table + uuid: 38d353dc-c0ef-41a0-97c7-3dcbebab9e02 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-JCU6rANFP + - ROW-EaYAQjv4W + type: CHART + CHART-DxY0c3RnIv: + children: [] + id: CHART-DxY0c3RnIv + meta: + chartId: 3 + height: 61 + sliceName: Node counts + uuid: 0c47c7b5-f500-46cb-97e3-9ebb637f0c8a + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-VUknWnOAy + - ROW-PvToekFjO + type: CHART + CHART-explore-10-1: + children: [] + id: CHART-explore-10-1 + meta: + chartId: 13 + height: 73 + sliceName: Transition layout + sliceNameOverride: Dialogue layout + uuid: 801e8c66-f693-46e3-a9a5-7b2b0b08a4a7 + width: 6 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-JCU6rANFP + - ROW-Ccs1FbcI- + type: CHART + CHART-explore-11-1: + children: [] + id: CHART-explore-11-1 + meta: + chartId: 5 + height: 56 + sliceName: Node visits [cloud] + uuid: b25b4292-ff21-4164-98ac-b1cba95e2994 + width: 8 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + - ROW-rR4J0eAhh + type: CHART + CHART-explore-13-1: + children: [] + id: CHART-explore-13-1 + meta: + chartId: 14 + height: 74 + sliceName: Transition ratio [chord] + uuid: 388d2359-8d13-4795-8dc9-1cb5dfa92ee1 + width: 6 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-JCU6rANFP + - ROW-Ccs1FbcI- + type: CHART + CHART-explore-14-1: + children: [] + id: CHART-explore-14-1 + meta: + chartId: 7 + height: 105 + sliceName: Node visits [sunburst] + uuid: e955f48c-824c-423c-a11c-5b5dca162927 + width: 6 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + - ROW-ZcN9G9RL5 + type: CHART + CHART-explore-20-1: + children: [] + id: CHART-explore-20-1 + meta: + chartId: 2 + height: 50 + sliceName: Node Visits + uuid: 44f4ab9d-5072-4926-a6ed-8615fb81b3d0 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + - ROW-ksepbeQu6 + type: CHART + CHART-explore-21-1: + children: [] + id: CHART-explore-21-1 + meta: + chartId: 1 + height: 60 + sliceName: Flow visit ratio monitor + uuid: ba02528b-184b-4304-b027-f2b7d9011ab0 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + - ROW-ZTVWOu2o0 + type: CHART + CHART-explore-22-1: + children: [] + id: CHART-explore-22-1 + meta: + chartId: 11 + height: 61 + sliceName: Terminal labels + sliceNameOverride: Terminal labels monitor + uuid: cf066e41-a9e8-4f54-a875-ebf4da350b59 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + - ROW-LhKWfqM7V + type: CHART + CHART-explore-9-1: + children: [] + id: CHART-explore-9-1 + meta: + chartId: 6 + height: 105 + sliceName: Node visits [ratio] + uuid: f9fb7893-3533-4519-bbc4-1f4853f380e1 + width: 6 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + - ROW-ZcN9G9RL5 + type: CHART + CHART-mYC2udeF3a: + children: [] + id: CHART-mYC2udeF3a + meta: + chartId: 9 + height: 50 + sliceName: Service load [users] + uuid: b5d43314-514c-464e-9fcc-f897e3ae0963 + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + - ROW-Ae7v1prsp2 + type: CHART + CHART-wci7CHiza6: + children: [] + id: CHART-wci7CHiza6 + meta: + chartId: 12 + height: 66 + sliceName: Transition counts + uuid: 9fcc7cc1-9257-4c0d-b377-b3a60a8bf3df + width: 12 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-VUknWnOAy + - ROW-TRNUnY9X_Y + type: CHART + COLUMN-JRaDi96UVP: + children: [] + id: COLUMN-JRaDi96UVP + meta: + background: BACKGROUND_TRANSPARENT + width: 2 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + - ROW-rR4J0eAhh + type: COLUMN + COLUMN-WysgPuf0P1: + children: [] + id: COLUMN-WysgPuf0P1 + meta: + background: BACKGROUND_TRANSPARENT + width: 2 + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + - ROW-rR4J0eAhh + type: COLUMN + DASHBOARD_VERSION_KEY: v2 + GRID_ID: + children: + - HEADER-nYVwogYInk + - TABS-Xoi5oUBxZI + id: GRID_ID + parents: + - ROOT_ID + type: GRID + HEADER-nYVwogYInk: + children: [] + id: HEADER-nYVwogYInk + meta: + background: BACKGROUND_TRANSPARENT + headerSize: LARGE_HEADER + text: DFF Stats + parents: + - ROOT_ID + - GRID_ID + type: HEADER + HEADER_ID: + id: HEADER_ID + meta: + text: DFF Stats + type: HEADER + ROOT_ID: + children: + - GRID_ID + id: ROOT_ID + type: ROOT + ROW-7g6_n72hZ: + children: + - CHART-Af3zvLsiKV + id: ROW-7g6_n72hZ + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + type: ROW + ROW-Ae7v1prsp2: + children: + - CHART-mYC2udeF3a + id: ROW-Ae7v1prsp2 + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + type: ROW + ROW-Ccs1FbcI-: + children: + - CHART-explore-10-1 + - CHART-explore-13-1 + id: ROW-Ccs1FbcI- + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-JCU6rANFP + type: ROW + ROW-EaYAQjv4W: + children: + - CHART-CuCYDOlGEu + id: ROW-EaYAQjv4W + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-JCU6rANFP + type: ROW + ROW-LhKWfqM7V: + children: + - CHART-explore-22-1 + id: ROW-LhKWfqM7V + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + type: ROW + ROW-PvToekFjO: + children: + - CHART-DxY0c3RnIv + id: ROW-PvToekFjO + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-VUknWnOAy + type: ROW + ROW-TRNUnY9X_Y: + children: + - CHART-wci7CHiza6 + id: ROW-TRNUnY9X_Y + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-VUknWnOAy + type: ROW + ROW-ZTVWOu2o0: + children: + - CHART-explore-21-1 + id: ROW-ZTVWOu2o0 + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + type: ROW + ROW-ZcN9G9RL5: + children: + - CHART-explore-9-1 + - CHART-explore-14-1 + id: ROW-ZcN9G9RL5 + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + type: ROW + ROW-hfJk2ddHi: + children: + - CHART-explore-20-1 + id: ROW-hfJk2ddHi + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-VUknWnOAy + type: ROW + ROW-ksepbeQu6: + children: + - CHART-explore-20-1 + id: ROW-ksepbeQu6 + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-6zE8noCIsx + type: ROW + ROW-rR4J0eAhh: + children: + - COLUMN-JRaDi96UVP + - CHART-explore-11-1 + - COLUMN-WysgPuf0P1 + id: ROW-rR4J0eAhh + meta: + background: BACKGROUND_TRANSPARENT + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + - TAB-Gw0Ffh1lG + type: ROW + TAB-6zE8noCIsx: + children: + - ROW-Ae7v1prsp2 + - ROW-ksepbeQu6 + - ROW-ZTVWOu2o0 + - ROW-7g6_n72hZ + - ROW-LhKWfqM7V + id: TAB-6zE8noCIsx + meta: + defaultText: Tab title + placeholder: Tab title + text: Service stats + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + type: TAB + TAB-Gw0Ffh1lG: + children: + - ROW-ZcN9G9RL5 + - ROW-rR4J0eAhh + id: TAB-Gw0Ffh1lG + meta: + defaultText: Tab title + placeholder: Tab title + text: General stats + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + type: TAB + TAB-JCU6rANFP: + children: + - ROW-EaYAQjv4W + - ROW-Ccs1FbcI- + id: TAB-JCU6rANFP + meta: + defaultText: Tab title + placeholder: Tab title + text: Overview + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + type: TAB + TAB-VUknWnOAy: + children: + - ROW-PvToekFjO + - ROW-hfJk2ddHi + - ROW-TRNUnY9X_Y + id: TAB-VUknWnOAy + meta: + defaultText: Tab title + placeholder: Tab title + text: Additional stats + parents: + - ROOT_ID + - GRID_ID + - TABS-Xoi5oUBxZI + type: TAB + TABS-Xoi5oUBxZI: + children: + - TAB-JCU6rANFP + - TAB-Gw0Ffh1lG + - TAB-VUknWnOAy + - TAB-6zE8noCIsx + id: TABS-Xoi5oUBxZI + meta: {} + parents: + - ROOT_ID + - GRID_ID + type: TABS +metadata: + color_scheme: echarts4Colors + label_colors: {} + shared_label_colors: + 'greeting_flow: node1': '#c23531' + 'greeting_flow: node2': '#2f4554' + 'greeting_flow: node3': '#61a0a8' + 'greeting_flow: node4': '#d48265' + greeting_flow, node1: '#c23531' + greeting_flow, node2: '#2f4554' + greeting_flow, node3: '#61a0a8' + greeting_flow, node4: '#d48265' + greeting_flow: '#c23531' + AVG(CAST(request_id AS Int16)): '#c23531' + MAX(CAST(request_id AS Int16)): '#2f4554' + root: '#c23531' + node2: '#61a0a8' + node4: '#d48265' + node3: '#91c7ae' + node1: '#749f83' + ? '' + : '#c23531' + timed_refresh_immune_slices: [] + expanded_slices: {} + refresh_frequency: 1800 + show_native_filters: true + default_filters: '{}' + chart_configuration: {} + color_scheme_domain: + - '#c23531' + - '#2f4554' + - '#61a0a8' + - '#d48265' + - '#91c7ae' + - '#749f83' + - '#ca8622' + - '#bda29a' + - '#6e7074' + - '#546570' + - '#c4ccd3' + cross_filters_enabled: false + native_filter_configuration: + - id: NATIVE_FILTER-5vyW3SgU5 + controlValues: + enableEmptyFilter: false + name: Time grain + filterType: filter_timegrain + targets: + - datasetUuid: fda98ab8-f550-45f1-9ded-0113f3e67260 + defaultDataMask: + extraFormData: {} + filterState: {} + ownState: {} + cascadeParentIds: [] + scope: + rootPath: + - TAB-6zE8noCIsx + excluded: + - 7 + type: NATIVE_FILTER + description: '' +version: 1.0.0 diff --git a/dff/config/superset_dashboard/databases/dff_database.yaml b/dff/config/superset_dashboard/databases/dff_database.yaml new file mode 100644 index 000000000..b178a0f80 --- /dev/null +++ b/dff/config/superset_dashboard/databases/dff_database.yaml @@ -0,0 +1,13 @@ +database_name: dff_database +sqlalchemy_uri: clickhousedb+connect://username:XXXXXXXXXX@clickhouse:8123/test +cache_timeout: null +expose_in_sqllab: true +allow_run_async: false +allow_ctas: false +allow_cvas: false +allow_dml: false +allow_file_upload: false +extra: + allows_virtual_table_explore: true +uuid: ee32f76e-55e3-483a-935a-22d03072db23 +version: 1.0.0 diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml b/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml new file mode 100644 index 000000000..a07fdd800 --- /dev/null +++ b/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml @@ -0,0 +1,105 @@ +table_name: dff_final_nodes +main_dttm_col: null +description: null +default_endpoint: null +offset: 0 +cache_timeout: null +schema: test +sql: "\nWITH main AS (\n SELECT LogAttributes['context_id'] AS context_id,\n \ + \ max(LogAttributes['request_id']) AS max_history\n FROM otel_logs\nGROUP BY\ + \ context_id\n)\nSELECT DISTINCT LogAttributes['context_id'] AS context_id,\nLogAttributes['request_id']\ + \ AS request_id,\notel_logs.Timestamp AS start_time,\nJSON_VALUE(otel_logs.Body,\ + \ '$.label') AS label,\nJSON_VALUE(otel_logs.Body, '$.flow') AS flow_label,\nJSON_VALUE(otel_logs.Body,\ + \ '$.node') AS node_label\nFROM otel_logs\nINNER JOIN main\nON context_id = main.context_id\n\ + AND request_id = main.max_history\nINNER JOIN otel_traces\nON otel_logs.TraceId\ + \ = otel_traces.TraceId\nWHERE otel_traces.SpanName = 'get_current_label'\n" +params: null +template_params: null +filter_select_enabled: false +fetch_values_predicate: null +extra: null +uuid: d7f6546e-1e3a-479d-8531-05b5e73e5c05 +metrics: +- metric_name: count + verbose_name: null + metric_type: null + expression: count(*) + description: null + d3format: null + extra: {} + warning_text: null +columns: +- column_name: start_time + verbose_name: null + is_dttm: true + is_active: true + type: DateTime64(9) + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: node_label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: flow_label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: context_id + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: request_id + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +version: 1.0.0 +database_uuid: ee32f76e-55e3-483a-935a-22d03072db23 diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml b/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml new file mode 100644 index 000000000..5ddab7c69 --- /dev/null +++ b/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml @@ -0,0 +1,144 @@ +table_name: dff_node_stats +main_dttm_col: null +description: null +default_endpoint: null +offset: 0 +cache_timeout: null +schema: test +sql: "\nWITH main AS (\n SELECT DISTINCT otel_logs.LogAttributes['context_id']\ + \ as context_id,\n otel_logs.LogAttributes['request_id'] as request_id,\n \ + \ otel_traces.Timestamp as start_time,\n otel_traces.SpanName as data_key,\n\ + \ otel_logs.Body as data,\n JSON_VALUE(otel_logs.Body, '$.label') as label,\n\ + \ JSON_VALUE(otel_logs.Body, '$.flow') as flow_label,\n JSON_VALUE(otel_logs.Body,\ + \ '$.node') as node_label,\n otel_logs.TraceId as trace_id,\n otel_traces.TraceId\n\ + FROM otel_logs, otel_traces\n WHERE otel_logs.TraceId = otel_traces.TraceId and\ + \ otel_traces.SpanName = 'get_current_label'\n ORDER BY context_id, request_id\n\ + ) SELECT context_id,\n request_id,\n start_time,\n data_key,\n data,\n\ + \ label,\n neighbor(label, -1) as prev_label,\n flow_label,\n node_label\n\ + FROM main\nWHERE label != ''\n" +params: null +template_params: null +filter_select_enabled: false +fetch_values_predicate: null +extra: null +uuid: fda98ab8-f550-45f1-9ded-0113f3e67260 +metrics: +- metric_name: count + verbose_name: null + metric_type: null + expression: count(*) + description: null + d3format: null + extra: {} + warning_text: null +columns: +- column_name: data_key + verbose_name: null + is_dttm: false + is_active: true + type: LowCardinality(String) + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: start_time + verbose_name: null + is_dttm: true + is_active: true + type: DateTime + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: node_label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: prev_label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: flow_label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: context_id + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: request_id + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: data + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +- column_name: label + verbose_name: null + is_dttm: false + is_active: true + type: String + advanced_data_type: null + groupby: true + filterable: true + expression: null + description: null + python_date_format: null + extra: {} +version: 1.0.0 +database_uuid: ee32f76e-55e3-483a-935a-22d03072db23 diff --git a/dff/config/superset_dashboard/metadata.yaml b/dff/config/superset_dashboard/metadata.yaml new file mode 100644 index 000000000..b68dadba1 --- /dev/null +++ b/dff/config/superset_dashboard/metadata.yaml @@ -0,0 +1,3 @@ +version: 1.0.0 +type: Dashboard +timestamp: '2023-07-25T14:27:32.324677+00:00' diff --git a/dff/pipeline/types.py b/dff/pipeline/types.py index 0a1e679fe..39584a303 100644 --- a/dff/pipeline/types.py +++ b/dff/pipeline/types.py @@ -110,6 +110,13 @@ class ExtraHandlerType(str, Enum): class ServiceRuntimeInfo(BaseModel): + """ + Type of object, that is passed to components in runtime. + Contains current component info (`name`, `path`, `timeout`, `asynchronous`). + Also contains `execution_state` - a dictionary, + containing execution states of other components mapped to their paths. + """ + name: str path: str timeout: Optional[float] @@ -117,14 +124,6 @@ class ServiceRuntimeInfo(BaseModel): execution_state: Dict[str, ComponentExecutionState] -""" -Type of object, that is passed to components in runtime. -Contains current component info (`name`, `path`, `timeout`, `asynchronous`). -Also contains `execution_state` - a dictionary, -containing execution states of other components mapped to their paths. -""" - - ExtraHandlerFunction: TypeAlias = Union[ Callable[[Context], Any], Callable[[Context, _ForwardPipeline], Any], diff --git a/dff/stats/__init__.py b/dff/stats/__init__.py index 973e6e2d5..058cd7fbd 100644 --- a/dff/stats/__init__.py +++ b/dff/stats/__init__.py @@ -1,2 +1,10 @@ # -*- coding: utf-8 -*- # flake8: noqa: F401 + +from . import exporter_patch +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace.export import ConsoleSpanExporter +from opentelemetry.sdk._logs.export import InMemoryLogExporter, ConsoleLogExporter +from opentelemetry.sdk.metrics.export import InMemoryMetricReader, ConsoleMetricExporter +from .utils import get_wrapper_field, set_logger_destination, set_tracer_destination +from .instrumentor import OtelInstrumentor, OTLPMetricExporter, OTLPLogExporter, OTLPSpanExporter diff --git a/dff/stats/__main__.py b/dff/stats/__main__.py new file mode 100644 index 000000000..396273ea2 --- /dev/null +++ b/dff/stats/__main__.py @@ -0,0 +1,111 @@ +""" +Main +---- +This module includes command line scripts for Superset dashboard configuration, +e.g. for creating and importing configuration archives. +In a configuration archive, you can define such settings as passwords, networking addressses etc. +using your own parameters that can be passed as a config file and overridden by command line arguments. + +Examples +******** + +.. code:: bash + + # Create and import a configuration archive. + # The import overrides existing dashboard configurations. + dff.stats config.yaml \\ + -U superset_user \\ + -P superset_password \\ + -dP database_password \\ + --db.user=database_user \\ + --db.host=clickhouse \\ + --db.port=8123 \\ + --db.name=test \\ + --db.table=otel_logs \\ + --outfile=config_artifact.zip + +""" +import sys +import argparse +from typing import Optional + +from .cli import import_dashboard, make_zip_config +from .utils import PasswordAction + + +def main(parsed_args: Optional[argparse.Namespace] = None): + """ + Function that evokes procedures defined in `cli` module. + + :param parsed_args: Set of command line arguments. If passed, overrides the command line contents. + See the module docs for reference. + """ + parser = argparse.ArgumentParser( + usage="""Creates a configuration archive and uploads it to the Superset server. + The import overrides existing dashboard configurations if present. + The function accepts a yaml file; also, all of the options can also be overridden + via the command line. Setting passwords interactively is supported. + + dff.stats config.yaml \\ + -U superset_user \\ + -P superset_password \\ + -dP database_password \\ + --db.user=database_user \\ + --db.host=clickhouse \\ + --db.port=8123 \\ + --db.name=test \\ + --db.table=otel_logs \\ + --outfile=config_artifact.zip + + Use the `--help` flag to get more information.""" + ) + parser.add_argument("file", type=str) + parser.add_argument( + "-dD", + "--db.driver", + choices=["clickhousedb+connect"], + help="DBMS driver.", + default="clickhousedb+connect", + ) + parser.add_argument("-dU", "--db.user", help="Database user.") + parser.add_argument("-dh", "--db.host", default="clickhouse", help="Database host.") + parser.add_argument("-dp", "--db.port", help="Database port.") + parser.add_argument("-dn", "--db.name", help="Name of the database.") + parser.add_argument("-dt", "--db.table", default="otel_logs", help="Name of the table.") + parser.add_argument("-o", "--outfile", help="Optionally persist the configuration as a zip file.") + parser.add_argument("-H", "--host", default="localhost", help="Superset host") + parser.add_argument("-p", "--port", default="8088", help="Superset port.") + parser.add_argument("-U", "--username", required=True, help="Superset user.") + parser.add_argument( + "-P", + "--password", + dest="password", + type=str, + action=PasswordAction, + help="Superset password.", + nargs="?", + required=True, + ) + parser.add_argument( + "-dP", + "--db.password", + dest="db.password", + type=str, + action=PasswordAction, + help="Database password.", + required=True, + nargs="?", + ) + + if parsed_args is None: + parsed_args = parser.parse_args(sys.argv[1:]) + + outfile = make_zip_config(parsed_args) + import_dashboard(parsed_args, zip_file=str(outfile)) + + if not hasattr(parsed_args, "outfile") or parsed_args.outfile is None: + outfile.unlink() + + +if __name__ == "__main__": + main() diff --git a/dff/stats/cli.py b/dff/stats/cli.py new file mode 100644 index 000000000..20058a990 --- /dev/null +++ b/dff/stats/cli.py @@ -0,0 +1,252 @@ +""" +Command Line Interface +---------------------- +This modules defines commands that can be called via the command line interface. + +""" +import tempfile +import shutil +import sys +import argparse +import os +import logging +from urllib import parse +from pathlib import Path +from typing import Optional + +try: + from omegaconf import OmegaConf + from .utils import get_superset_session, drop_superset_assets +except ImportError: + raise ImportError("Some packages are not found. Run `pip install dff[stats]`") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +DFF_DIR = Path(__file__).absolute().parent.parent +""" +Root directory of the local `dff` installation. + +:meta hide-value: +""" +DASHBOARD_DIR = str(DFF_DIR / "config" / "superset_dashboard") +""" +Local path to superset dashboard files to import. + +:meta hide-value: +""" +DASHBOARD_SLUG = "dff-stats" +""" +This variable stores a slug used for building the http address of the DFF dashboard. +""" +DEFAULT_SUPERSET_URL = parse.urlunsplit(("http", "localhost:8088", "/", "", "")) +""" +Default location of the Superset dashboard. +""" + +TYPE_MAPPING_CH = { + "FLOAT": "Nullable(Float64)", + "STRING": "Nullable(String)", + "LONGINTEGER": "Nullable(Int64)", + "INTEGER": "Nullable(Int64)", + "DATETIME": "Nullable(DateTime)", +} +""" +Mapping of standard sql column types to Clickhouse native types. + +:meta hide-value: +""" + +DFF_NODE_STATS_STATEMENT = """ +WITH main AS ( + SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id, + {table}.LogAttributes['request_id'] as request_id, + toDateTime(otel_traces.Timestamp) as start_time, + otel_traces.SpanName as data_key, + {table}.Body as data, + {lblfield} as label, + {flowfield} as flow_label, + {nodefield} as node_label, + {table}.TraceId as trace_id, + otel_traces.TraceId\nFROM {table}, otel_traces + WHERE {table}.TraceId = otel_traces.TraceId and otel_traces.SpanName = 'get_current_label' + ORDER BY context_id, request_id +) SELECT context_id, + request_id, + start_time, + data_key, + data, + label, + {lag} as prev_label, + flow_label, + node_label +FROM main +WHERE label != '' +""" +DFF_ACYCLIC_NODES_STATEMENT = """ +WITH main AS ( + SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id, + {table}.LogAttributes['request_id'] as request_id, + {table}.Timestamp as timestamp, + {lblfield} as label\nFROM {table} + INNER JOIN +( + WITH helper AS ( + SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id, + {table}.LogAttributes['request_id'] as request_id, + {lblfield} as label + FROM {table} + ) + SELECT context_id FROM helper + GROUP BY context_id + HAVING COUNT(context_id) = COUNT(DISTINCT label) +) as plain_ctx +ON plain_ctx.context_id = context_id +ORDER by context_id, request_id +) +SELECT * FROM main +""" +DFF_FINAL_NODES_STATEMENT = """ +WITH main AS ( + SELECT LogAttributes['context_id'] AS context_id, + max(LogAttributes['request_id']) AS max_history + FROM {table}\nGROUP BY context_id +) +SELECT DISTINCT LogAttributes['context_id'] AS context_id, +LogAttributes['request_id'] AS request_id, +{table}.Timestamp AS start_time, +{lblfield} AS label, +{flowfield} AS flow_label, +{nodefield} AS node_label +FROM {table} +INNER JOIN main +ON context_id = main.context_id +AND request_id = main.max_history +INNER JOIN otel_traces +ON {table}.TraceId = otel_traces.TraceId +WHERE otel_traces.SpanName = 'get_current_label' +""" + +SQL_STATEMENT_MAPPING = { + "dff_acyclic_nodes.yaml": DFF_ACYCLIC_NODES_STATEMENT, + "dff_node_stats.yaml": DFF_NODE_STATS_STATEMENT, + "dff_final_nodes.yaml": DFF_FINAL_NODES_STATEMENT, +} +""" +Select statements for dashboard configuration with names and types represented as placeholders. +The placeholder system makes queries database agnostic, required values are set during the import phase. + +:meta hide-value: +""" + + +def import_dashboard(parsed_args: Optional[argparse.Namespace] = None, zip_file: Optional[str] = None): + """ + Import an Apache Superset dashboard to a local instance with specified arguments. + Before using the command, make sure you have your Superset instance + up and running: `ghcr.io/deeppavlov/superset_df_dashboard:latest`. + The import will override existing dashboard configurations if present. + + :param parsed_args: Command line arguments produced by `argparse`. + :param zip_file: Zip archived dashboard config. + """ + host = parsed_args.host if hasattr(parsed_args, "host") else "localhost" + port = parsed_args.port if hasattr(parsed_args, "port") else "8088" + superset_url = parse.urlunsplit(("http", f"{host}:{port}", "/", "", "")) + zip_filename = os.path.basename(zip_file) + db_password = getattr(parsed_args, "db.password") + + session, headers = get_superset_session(parsed_args, superset_url) + drop_superset_assets(session, headers, superset_url) + import_dashboard_url = parse.urljoin(superset_url, "/api/v1/dashboard/import/") + # upload files + with open(zip_file, "rb") as f: + response = session.request( + "POST", + import_dashboard_url, + headers=headers, + data={ + "passwords": '{"databases/dff_database.yaml":"' + db_password + '"}', + "overwrite": "true", + }, + files=[("formData", (zip_filename, f, "application/zip"))], + ) + response.raise_for_status() + logger.info(f"Upload finished with status {response.status_code}.") + + +def make_zip_config(parsed_args: argparse.Namespace) -> Path: + """ + Make a zip-archived Apache Superset dashboard config, using specified arguments. + + :param parsed_args: Command line arguments produced by `argparse`. + """ + if hasattr(parsed_args, "outfile") and parsed_args.outfile: + outfile_name = parsed_args.outfile + else: + outfile_name = "temp.zip" + + file_conf = OmegaConf.load(parsed_args.file) + sys.argv = [__file__] + [f"{key}={value}" for key, value in parsed_args.__dict__.items() if value] + cmd_conf = OmegaConf.from_cli() + cli_conf = OmegaConf.merge(file_conf, cmd_conf) + + if OmegaConf.select(cli_conf, "db.driver") == "clickhousedb+connect": + params = dict( + table="${db.table}", + lag="neighbor(label, -1)", + texttype="String", + lblfield="JSON_VALUE(${db.table}.Body, '$.label')", + flowfield="JSON_VALUE(${db.table}.Body, '$.flow')", + nodefield="JSON_VALUE(${db.table}.Body, '$.node')", + ) + else: + raise ValueError("The only supported database driver is 'clickhousedb+connect'.") + + conf = SQL_STATEMENT_MAPPING.copy() + for key in conf.keys(): + conf[key] = {} + conf[key]["sql"] = SQL_STATEMENT_MAPPING[key].format(**params) + + resolve_conf = OmegaConf.create( + { + "database": { + "sqlalchemy_uri": "${db.driver}://${db.user}:XXXXXXXXXX@${db.host}:${db.port}/${db.name}", + }, + **conf, + } + ) + + user_config = OmegaConf.merge(cli_conf, resolve_conf) + OmegaConf.resolve(user_config) + + with tempfile.TemporaryDirectory() as temp_config_dir: + nested_temp_dir = os.path.join(temp_config_dir, "superset_dashboard") + logger.info(f"Copying config files to temporary directory: {nested_temp_dir}.") + + shutil.copytree(DASHBOARD_DIR, nested_temp_dir) + database_dir = Path(os.path.join(nested_temp_dir, "databases")) + dataset_dir = Path(os.path.join(nested_temp_dir, "datasets/dff_database")) + + logger.info("Overriding the initial configuration.") + # overwrite sqlalchemy uri + for filepath in database_dir.iterdir(): + file_config = OmegaConf.load(filepath) + new_file_config = OmegaConf.merge(file_config, OmegaConf.select(user_config, "database")) + OmegaConf.save(new_file_config, filepath) + + # overwrite sql expressions and column types + for filepath in dataset_dir.iterdir(): + file_config = OmegaConf.load(filepath) + new_file_config = OmegaConf.merge(file_config, getattr(user_config, filepath.name)) + if OmegaConf.select(cli_conf, "db.driver") == "clickhousedb+connect": + for col in OmegaConf.select(new_file_config, "columns"): + col.type = TYPE_MAPPING_CH.get(col.type, col.type) + OmegaConf.save(new_file_config, filepath) + + if ".zip" not in outfile_name: + raise ValueError(f"Outfile name missing .zip extension: {outfile_name}.") + logger.info(f"Saving the archive to {outfile_name}.") + shutil.make_archive(outfile_name[: outfile_name.rindex(".zip")], format="zip", root_dir=temp_config_dir) + + return Path(outfile_name) diff --git a/dff/stats/default_extractors.py b/dff/stats/default_extractors.py new file mode 100644 index 000000000..d48cd6d26 --- /dev/null +++ b/dff/stats/default_extractors.py @@ -0,0 +1,55 @@ +""" +Default Extractors +------------------ +This module includes a pool of default extractors +that you can use out of the box. + +The default configuration for Superset dashboard leverages the data collected +by the extractors below. In order to use the default charts, +make sure that you include those functions in your pipeline. +Detailed examples can be found in the `tutorials` section. + +""" +from datetime import datetime + +from dff.script import Context +from dff.pipeline import ExtraHandlerRuntimeInfo +from .utils import get_wrapper_field + + +async def get_current_label(ctx: Context, _, info: ExtraHandlerRuntimeInfo): + """ + Extract the current label on each turn. + This function is required for running the dashboard with the default configuration. + """ + last_label = ctx.last_label + if last_label is None: + return {"flow": None, "node": None, "label": None} + return {"flow": last_label[0], "node": last_label[1], "label": ": ".join(last_label)} + + +async def get_timing_before(ctx: Context, _, info: ExtraHandlerRuntimeInfo): + """ + Extract the pipeline component's start time. + This function is required for running the dashboard with the default configuration. + + The function leverages the `framework_states` field of the context to store results. + As a result, the function output is cleared on every turn and does not get persisted + to the context storage. + """ + start_time = datetime.now() + ctx.framework_states[get_wrapper_field(info, "time")] = start_time + + +async def get_timing_after(ctx: Context, _, info: ExtraHandlerRuntimeInfo): # noqa: F811 + """ + Extract the pipeline component's finish time. + This function is required for running the dashboard with the default configuration. + + The function leverages the `framework_states` field of the context to store results. + As a result, the function output is cleared on every turn and does not get persisted + to the context storage. + """ + start_time = ctx.framework_states[get_wrapper_field(info, "time")] + data = {"execution_time": str(datetime.now() - start_time)} + return data diff --git a/dff/stats/exporter_patch.py b/dff/stats/exporter_patch.py new file mode 100644 index 000000000..f87ca387b --- /dev/null +++ b/dff/stats/exporter_patch.py @@ -0,0 +1,37 @@ +""" +Exporter patch +---------------- +This module contains temporary utilities +that patch Opentelemetry's GRPC log exporter +which makes it possible to export arbitrary dict-like values. +""" +from typing import Mapping +from wrapt import wrap_function_wrapper as _wrap +import opentelemetry.exporter.otlp.proto.grpc as otlp_exporter_grpc +from opentelemetry.exporter.otlp.proto.grpc.exporter import _translate_key_values +from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValueList + + +def grpc_mapping_translation_patch(translate_value, _, args, kwargs): + """ + This decorator patches the `_translate_value` function + from OpenTelemetry GRPC Log exporter module allowing the class + to translate values of type `dict` and `NoneType` + into the `protobuf` protocol. + + :param _translate_value: The original function. + :param args: Positional arguments of the original function. + :param kwargs: Keyword arguments of the original function. + """ + translated_value = args[0] + if isinstance(translated_value, type(None)): + return AnyValue(string_value="") + if isinstance(translated_value, Mapping): + return AnyValue( + kvlist_value=KeyValueList(values=[_translate_key_values(str(k), v) for k, v in translated_value.items()]) + ) + else: + return translate_value(*args, **kwargs) + + +_wrap(otlp_exporter_grpc, "exporter._translate_value", grpc_mapping_translation_patch) diff --git a/dff/stats/instrumentor.py b/dff/stats/instrumentor.py new file mode 100644 index 000000000..a053c532b --- /dev/null +++ b/dff/stats/instrumentor.py @@ -0,0 +1,195 @@ +""" +Instrumentor +------------- +This modules contains the :py:class:`~OtelInstrumentor` class that implements +Opentelemetry's `BaseInstrumentor` interface and allows for automated +instrumentation of Dialog Flow Framework applications, +e.g. for automated logging and log export. + +For detailed reference, see `~OtelInstrumentor` class. +""" +import asyncio +from typing import Collection, Optional + +from wrapt import wrap_function_wrapper, decorator +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.utils import unwrap +from opentelemetry.metrics import get_meter, get_meter_provider, Meter +from opentelemetry.trace import get_tracer, get_tracer_provider, Tracer +from opentelemetry._logs import get_logger, get_logger_provider, Logger, SeverityNumber +from opentelemetry.trace import SpanKind, Span +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk._logs import LoggerProvider, LogRecord +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter + +from dff.script.core.context import get_last_index +from dff.stats.utils import ( + resource, + get_wrapper_field, + set_logger_destination, + set_meter_destination, + set_tracer_destination, +) +from dff.stats import default_extractors + + +INSTRUMENTS = ["dff"] + + +class OtelInstrumentor(BaseInstrumentor): + """ + Utility class for instrumenting DFF-related functions + that implements the :py:class:`~BaseInstrumentor` interface. + :py:meth:`~instrument` and :py:meth:`~uninstrument` methods + are available to apply and revert the instrumentation effects, + e.g. enable and disable logging at runtime. + + .. code-block:: + + dff_instrumentor = OtelInstrumentor() + dff_instrumentor.instrument() + dff_instrumentor.uninstrument() + + Opentelemetry provider instances can be optionally passed to the class constructor. + Otherwise, the global logger, tracer and meter providers are leveraged. + + The class implements the :py:meth:`~__call__` method, so that + regular functions can be decorated using the class instance. + + .. code-block:: + + @dff_instrumentor + def function(context, pipeline, runtime_info): + ... + + :param logger_provider: Opentelemetry logger provider. Used to construct a logger instance. + :param tracer_provider: Opentelemetry tracer provider. Used to construct a tracer instance. + :parame meter_provider: Opentelemetry meter provider. Used to construct a meter instance. + """ + + def __init__(self, logger_provider=None, tracer_provider=None, meter_provider=None) -> None: + super().__init__() + self._logger_provider: Optional[LoggerProvider] = None + self._tracer_provider: Optional[TracerProvider] = None + self._meter_provider: Optional[MeterProvider] = None + self._logger: Optional[Logger] = None + self._tracer: Optional[Tracer] = None + self._meter: Optional[Meter] = None + self._configure_providers( + logger_provider=logger_provider, tracer_provider=tracer_provider, meter_provider=meter_provider + ) + + def __enter__(self): + if not self.is_instrumented_by_opentelemetry: + self.instrument() + return self + + def __exit__(self): + if self.is_instrumented_by_opentelemetry: + self.uninstrument() + + @classmethod + def from_url(cls, url: str, insecure: bool = True, timeout: Optional[int] = None): + """ + Construct an instrumentor instance using only the url of the OTLP Collector. + Inherently modifies the global provider instances adding an export destination + for the target url. + + .. code-block:: + + instrumentor = OtelInstrumentor.from_url("grpc://localhost:4317") + + :param url: Url of the running Otel Collector server. Due to limited support of HTTP protocol + by the Opentelemetry Python extension, GRPC protocol is preferred. + :param insecure: Whether non-SSL-protected connection is allowed. Defaults to True. + :param timeout: Connection timeout in seconds, optional. + """ + set_logger_destination(OTLPLogExporter(endpoint=url, insecure=insecure, timeout=timeout)) + set_tracer_destination(OTLPSpanExporter(endpoint=url, insecure=insecure, timeout=timeout)) + set_meter_destination(OTLPMetricExporter(endpoint=url, insecure=insecure, timeout=timeout)) + return cls() + + def instrumentation_dependencies(self) -> Collection[str]: + """ + :meta private: + + Required libraries. Implements the Python Opentelemetry instrumentor interface. + + """ + return INSTRUMENTS + + def _instrument(self, logger_provider=None, tracer_provider=None, meter_provider=None): + if any([logger_provider, meter_provider, tracer_provider]): + self._configure_providers( + logger_provider=logger_provider, tracer_provider=tracer_provider, meter_provider=meter_provider + ) + wrap_function_wrapper(default_extractors, "get_current_label", self.__call__.__wrapped__) + wrap_function_wrapper(default_extractors, "get_timing_before", self.__call__.__wrapped__) + wrap_function_wrapper(default_extractors, "get_timing_after", self.__call__.__wrapped__) + + def _uninstrument(self, **kwargs): + unwrap(default_extractors, "get_current_label") + unwrap(default_extractors, "get_timing_before") + unwrap(default_extractors, "get_timing_after") + + def _configure_providers(self, logger_provider, tracer_provider, meter_provider): + self._logger_provider = logger_provider or get_logger_provider() + self._tracer_provider = tracer_provider or get_tracer_provider() + self._meter_provider = meter_provider or get_meter_provider() + self._logger = get_logger(__name__, None, self._logger_provider) + self._tracer = get_tracer(__name__, None, self._tracer_provider) + self._meter = get_meter(__name__, None, self._meter_provider) + + @decorator + async def __call__(self, wrapped, _, args, kwargs): + """ + Regular functions that match the :py:class:`~dff.pipeline.types.ExtraHandlerFunction` + signature can be decorated with the class instance to log the returned value. + This method implements the logging procedure. + The returned value is assumed to be `dict` or `NoneType`. + Logging non-atomic values is discouraged, as they cannot be translated using + the `Protobuf` protocol. + Logging is ignored if the application is in 'uninstrumented' state. + + :param wrapped: Function to decorate. + :param args: Positional arguments of the decorated function. + :param kwargs: Keyword arguments of the decorated function. + """ + ctx, _, info = args + pipeline_component = get_wrapper_field(info) + attributes = { + "context_id": str(ctx.id), + "request_id": get_last_index(ctx.requests), + "pipeline_component": pipeline_component, + } + + result: Optional[dict] + if asyncio.iscoroutinefunction(wrapped): + result = await wrapped(ctx, _, info) + else: + result = wrapped(ctx, _, info) + + if result is None or not self.is_instrumented_by_opentelemetry: + # self.is_instrumented_by_opentelemetry allows to disable + # the decorator programmatically if + # instrumentation is disabled. + return result + + span: Span + with self._tracer.start_as_current_span(wrapped.__name__, kind=SpanKind.INTERNAL) as span: + span_ctx = span.get_span_context() + record = LogRecord( + span_id=span_ctx.span_id, + trace_id=span_ctx.trace_id, + body=result, + trace_flags=span_ctx.trace_flags, + severity_text=None, + severity_number=SeverityNumber(1), + resource=resource, + attributes=attributes, + ) + self._logger.emit(record=record) + return result diff --git a/dff/stats/utils.py b/dff/stats/utils.py new file mode 100644 index 000000000..9f0426864 --- /dev/null +++ b/dff/stats/utils.py @@ -0,0 +1,203 @@ +""" +Utils +----- +This module includes utility functions designed for statistics collection. + +The functions below can be used to configure the opentelemetry destination. + +.. code:: python + + set_logger_destination(OTLPLogExporter("grpc://localhost:4317", insecure=True)) + set_tracer_destination(OTLPSpanExporter("grpc://localhost:4317", insecure=True)) + +""" +import json +import getpass +from urllib import parse +from typing import Optional, Tuple +from argparse import Namespace, Action + +import requests +from . import exporter_patch # noqa: F401 +from opentelemetry.sdk.resources import Resource +from opentelemetry._logs import get_logger_provider, set_logger_provider +from opentelemetry.trace import get_tracer_provider, set_tracer_provider +from opentelemetry.metrics import get_meter_provider, set_meter_provider +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter, SpanExporter +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter, LogExporter +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter, MetricExporter + +from dff.pipeline import ExtraHandlerRuntimeInfo + +SERVICE_NAME = "dialog_flow_framework" + +resource = Resource.create({"service.name": SERVICE_NAME}) +""" +Singletone :py:class:`~Resource` instance shared inside the framework. +""" +tracer_provider = TracerProvider(resource=resource) +""" +Global tracer provider bound to the DFF resource. +""" +logger_provider = LoggerProvider(resource=resource) +""" +Global logger provider bound to the DFF resource. +""" +set_logger_provider(logger_provider) +set_tracer_provider(tracer_provider) + + +def set_logger_destination(exporter: Optional[LogExporter] = None): + """ + Configure the global Opentelemetry logger provider to export logs to the given destination. + + :param exporter: Opentelemetry log exporter instance. + """ + if exporter is None: + exporter = OTLPLogExporter(endpoint="grpc://localhost:4317", insecure=True) + get_logger_provider().add_log_record_processor(BatchLogRecordProcessor(exporter)) + + +def set_meter_destination(exporter: Optional[MetricExporter] = None): + """ + Configure the global Opentelemetry meter provider to export metrics to the given destination. + + :param exporter: Opentelemetry meter exporter instance. + """ + if exporter is None: + exporter = OTLPMetricExporter(endpoint="grpc://localhost:4317", insecure=True) + cur_meter_provider = get_meter_provider() + new_meter_provider = MeterProvider(resource=resource, metric_readers=[PeriodicExportingMetricReader(exporter)]) + if not isinstance(cur_meter_provider, MeterProvider): + set_meter_provider(new_meter_provider) + + +def set_tracer_destination(exporter: Optional[SpanExporter] = None): + """ + Configure the global Opentelemetry tracer provider to export traces to the given destination. + + :param exporter: Opentelemetry span exporter instance. + """ + if exporter is None: + exporter = OTLPSpanExporter(endpoint="grpc://localhost:4317", insecure=True) + get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter)) + + +def get_wrapper_field(info: ExtraHandlerRuntimeInfo, postfix: str = "") -> str: + """ + This function can be used to obtain a key, under which the wrapper data will be stored + in the context. + + :param info: Handler runtime info obtained from the pipeline. + :param postfix: Field-specific postfix that will be appended to the field name. + """ + path = info.component.path.replace(".", "-") + return f"{path}" + (f"-{postfix}" if postfix else "") + + +def get_superset_session(args: Namespace, base_url: str = "http://localhost:8088/") -> Tuple[requests.Session, dict]: + """ + Utility function for authorized interaction with Superset HTTP API. + + :param args: Command line arguments including Superset username and Superset password. + :param base_url: Base Superset URL. + + :return: Authorized session - authorization headers tuple. + """ + healthcheck_url = parse.urljoin(base_url, "/healthcheck") + login_url = parse.urljoin(base_url, "/api/v1/security/login") + csrf_url = parse.urljoin(base_url, "/api/v1/security/csrf_token/") + + session = requests.Session() + # do healthcheck + response = session.get(healthcheck_url, timeout=10) + response.raise_for_status() + # get access token + access_request = session.post( + login_url, + headers={"Content-Type": "application/json", "Accept": "*/*"}, + data=json.dumps({"username": args.username, "password": args.password, "refresh": True, "provider": "db"}), + ) + access_token = access_request.json()["access_token"] + # get csrf_token + csrf_request = session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}) + csrf_token = csrf_request.json()["result"] + headers = { + "Authorization": f"Bearer {access_token}", + "X-CSRFToken": csrf_token, + } + return session, headers + + +def drop_superset_assets(session: requests.Session, headers: dict, base_url: str): + """ + Drop the existing assets from the Superset dashboard. + + :param session: Authorized Superset session. + :param headers: Superset session headers. + :param base_url: Base Superset URL. + """ + dashboard_url = parse.urljoin(base_url, "/api/v1/dashboard") + charts_url = parse.urljoin(base_url, "/api/v1/chart") + datasets_url = parse.urljoin(base_url, "/api/v1/dataset") + database_url = parse.urljoin(base_url, "/api/v1/database/") + delete_res: requests.Response + + dashboard_res = session.get(dashboard_url, headers=headers) + dashboard_json = dashboard_res.json() + if dashboard_json["count"] > 0: + delete_res = requests.delete(dashboard_url, params={"q": json.dumps(dashboard_json["ids"])}, headers=headers) + delete_res.raise_for_status() + + charts_result = session.get(charts_url, headers=headers) + charts_json = charts_result.json() + if charts_json["count"] > 0: + delete_res = requests.delete(charts_url, params={"q": json.dumps(charts_json["ids"])}, headers=headers) + delete_res.raise_for_status() + + datasets_result = session.get(datasets_url, headers=headers) + datasets_json = datasets_result.json() + if datasets_json["count"] > 0: + delete_res = requests.delete(datasets_url, params={"q": json.dumps(datasets_json["ids"])}, headers=headers) + delete_res.raise_for_status() + + database_res = session.get(database_url, headers=headers) + database_json = database_res.json() + if database_json["count"] > 0: + delete_res = requests.delete(database_url + str(database_json["ids"][-1]), headers=headers) + delete_res.raise_for_status() + + +class PasswordAction(Action): + """ + Child class for Argparse's :py:class:`~Action` that prompts users for passwords interactively, + ensuring password safety, unless the password is specified directly. + + """ + + def __init__( + self, option_strings, dest=None, nargs=0, default=None, required=False, type=None, metavar=None, help=None + ): + super().__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + default=default, + required=required, + metavar=metavar, + type=type, + help=help, + ) + + def __call__(self, parser, args, values, option_string=None): + if values: + print(f"{self.dest}: setting passwords explicitly through the command line is discouraged.") + setattr(args, self.dest, values) + else: + setattr(args, self.dest, getpass.getpass(prompt=f"{self.dest}: ")) diff --git a/dff/utils/docker/README.md b/dff/utils/docker/README.md new file mode 100644 index 000000000..6caf4490f --- /dev/null +++ b/dff/utils/docker/README.md @@ -0,0 +1,11 @@ +# DFF Docker utils + +## Description + +This directory provides Docker files, necessary for deployment +of various DFF utilities. + +## Contents + +* dockerfile_stats - Dockerfile for DFF statistics dashboard. +* entrypoint_stats.sh - Entrypoint script for DFF statistics dashboard. \ No newline at end of file diff --git a/dff/utils/docker/dockerfile_stats b/dff/utils/docker/dockerfile_stats new file mode 100644 index 000000000..5017fd975 --- /dev/null +++ b/dff/utils/docker/dockerfile_stats @@ -0,0 +1,6 @@ +FROM apache/superset:2.1.0rc1 +USER root +RUN cd /app && pip install .[clickhouse] +COPY entrypoint_stats.sh /app/docker/ +USER superset +ENTRYPOINT ["/bin/bash","/app/docker/entrypoint_stats.sh"] \ No newline at end of file diff --git a/dff/utils/docker/entrypoint_stats.sh b/dff/utils/docker/entrypoint_stats.sh new file mode 100644 index 000000000..663a39b75 --- /dev/null +++ b/dff/utils/docker/entrypoint_stats.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +export SERVER_THREADS_AMOUNT=8 +set -m +nohup /bin/bash /usr/bin/run-server.sh & +superset fab create-admin --firstname superset --lastname admin --username $SUPERSET_USERNAME --email admin@admin.com --password $SUPERSET_PASSWORD +superset db upgrade +superset init +fg \ No newline at end of file diff --git a/dff/utils/otel/otelcol-config-extras.yml b/dff/utils/otel/otelcol-config-extras.yml new file mode 100644 index 000000000..240bb5174 --- /dev/null +++ b/dff/utils/otel/otelcol-config-extras.yml @@ -0,0 +1,30 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 +# extra settings to be merged into OpenTelemetry Collector configuration +# do not delete this file +exporters: + clickhouse: + endpoint: tcp://clickhouse:9000 + database: test + username: username + password: pass + logs_table_name: otel_logs + traces_table_name: otel_traces + metrics_table_name: otel_metrics + timeout: 5s + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [logging, clickhouse] + logs: + receivers: [otlp] + processors: [batch] + exporters: [logging, clickhouse] \ No newline at end of file diff --git a/dff/utils/otel/otelcol-config.yml b/dff/utils/otel/otelcol-config.yml new file mode 100644 index 000000000..f152c8c35 --- /dev/null +++ b/dff/utils/otel/otelcol-config.yml @@ -0,0 +1,40 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + + +receivers: + otlp: + protocols: + grpc: + http: + cors: + allowed_origins: + - "http://*" + - "https://*" + +exporters: + logging: + +processors: + batch: + filter: + metrics: + exclude: + match_type: strict + metric_names: + - queueSize + +service: + pipelines: + metrics: + receivers: [otlp] + processors: [filter, batch] + exporters: [logging] + logs: + receivers: [otlp] + processors: [batch] + exporters: [logging] + traces: + receivers: [otlp] + processors: [batch] + exporters: [logging] \ No newline at end of file diff --git a/dff/utils/testing/telegram.py b/dff/utils/testing/telegram.py index 720dce6a6..d06ba3bb6 100644 --- a/dff/utils/testing/telegram.py +++ b/dff/utils/testing/telegram.py @@ -59,7 +59,7 @@ class TelegramTesting: # pragma: no cover :param pipeline: Pipeline with the telegram messenger interface. Required for :py:meth:`~dff.utils.testing.telegram.TelegramTesting.send_and_check` and - :py:meth`~dff.utils.testing.telegram.TelegramTesting.check_happy_path` with `run_bot=True` + :py:meth:`~dff.utils.testing.telegram.TelegramTesting.check_happy_path` with `run_bot=True` :param api_credentials: Telegram API id and hash. Obtainable via https://core.telegram.org/api/obtaining_api_id. diff --git a/docker-compose.yml b/docker-compose.yml index dc1f5bca7..9c33c632a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,3 +34,35 @@ services: # - 2135:2135 - 2136:2136 hostname: localhost + dashboard: + env_file: [.env_file] + build: + context: ./dff/utils/docker + dockerfile: dockerfile_stats + image: ghcr.io/deeppavlov/superset_df_dashboard:latest + ports: + - "8088:8088" + clickhouse: + env_file: [.env_file] + image: clickhouse/clickhouse-server:latest + restart: unless-stopped + ports: + - '8123:8123' + - '8443:8443' + - '9000:9000' + volumes: + - ch-data:/var/lib/clickhouse/ + otelcol: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-col + restart: unless-stopped + command: [ "--config=/etc/otelcol-config.yml", "--config=/etc/otelcol-config-extras.yml" ] + volumes: + - ./dff/utils/otel/otelcol-config.yml:/etc/otelcol-config.yml:ro + - ./dff/utils/otel/otelcol-config-extras.yml:/etc/otelcol-config-extras.yml:ro + ports: + - "4317:4317" # OTLP over gRPC receiver + - "4318:4318" # OTLP over HTTP receiver +volumes: + ch-data: + name: "ch-data" diff --git a/docs/source/_static/images/additional_stats.png b/docs/source/_static/images/additional_stats.png new file mode 100644 index 000000000..2b9d2c3f5 Binary files /dev/null and b/docs/source/_static/images/additional_stats.png differ diff --git a/docs/source/_static/images/databases.png b/docs/source/_static/images/databases.png new file mode 100644 index 000000000..167c4ce61 Binary files /dev/null and b/docs/source/_static/images/databases.png differ diff --git a/docs/source/_static/images/general_stats.png b/docs/source/_static/images/general_stats.png new file mode 100644 index 000000000..660f2ed89 Binary files /dev/null and b/docs/source/_static/images/general_stats.png differ diff --git a/docs/source/_static/images/overview.png b/docs/source/_static/images/overview.png new file mode 100644 index 000000000..5bd0fb6a1 Binary files /dev/null and b/docs/source/_static/images/overview.png differ diff --git a/docs/source/_static/images/service_stats.png b/docs/source/_static/images/service_stats.png new file mode 100644 index 000000000..7ce724286 Binary files /dev/null and b/docs/source/_static/images/service_stats.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 4bc3731dd..47ee31a1b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -170,6 +170,7 @@ def setup(_): ], ), ("tutorials.utils", "Utils"), + ("tutorials.stats", "Stats"), ] ) regenerate_apiref( @@ -178,6 +179,7 @@ def setup(_): ("dff.messengers", "Messenger Interfaces"), ("dff.pipeline", "Pipeline"), ("dff.script", "Script"), + ("dff.stats", "Stats"), ("dff.utils.testing", "Utils"), ] ) diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst index cdb90a035..8724a1489 100644 --- a/docs/source/user_guides.rst +++ b/docs/source/user_guides.rst @@ -9,8 +9,17 @@ those include but are not limited to: dialog graph creation, specifying start an setting transitions and conditions, using ``Context`` object in order to receive information about current script execution. +:doc:`Superset guide <./user_guides/superset_guide>` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``superset guide`` tutorial highlights the usage of Superset visualization tool +for exploring the telemetry data collected from your conversational services. +We show how to plug in the telemetry collection and configure the pre-built +Superset dashboard shipped with DFF. + .. toctree:: :hidden: user_guides/basic_conceptions + user_guides/superset_guide diff --git a/docs/source/user_guides/superset_guide.rst b/docs/source/user_guides/superset_guide.rst new file mode 100644 index 000000000..5add23bde --- /dev/null +++ b/docs/source/user_guides/superset_guide.rst @@ -0,0 +1,118 @@ +Superset guide +--------------------- + +Description +~~~~~~~~~~~ + +| The Dialog Flow Stats module can be used to obtain and visualize usage statistics for your service. +| The module relies on several open source solutions that allow for data persistence and visualization + +* `Clickhouse `_ serves as an OLAP storage for data. +* Batching and preprocessing data is based on `OpenTelemetry protocol `_ and the `OpenTelemetry collector `_. +* Interactive visualization is powered by `Apache Superset `_. + +All the mentioned services are shipped as Docker containers, including a pre-built Superset image that ensures API compatibility. + +Collection procedure +~~~~~~~~~~~~~~~~~~~~ + +**Installation** + +.. code-block:: shell + + pip install dff[stats] + +**Launching services** + +.. code-block:: shell + :linenos: + + # clone the original repository to access the docker-compose file + git clone https://github.com/deeppavlov/dialog_flow_framework.git + # launch the required services + cd dialog_flow_framework + docker-compose up otelcol clickhouse dashboard + +**Collecting data** + +Collecting data is done by means of instrumenting your conversational service before you run it. +DFF tutorials showcase all the necessary steps, needed to achieve that. We will run +`one of those files <../tutorials/tutorials.stats.1_extractor_functions.py>`_ +in order to obtain sample data points to visualize. + +.. code-block:: shell + + export DISABLE_INTERACTIVE_MODE=1 && python tutorials/stats/1_extractor_functions.py + +Displaying the data +~~~~~~~~~~~~~~~~~~~ + +In order to display the Superset dashboard, you should update the default configuration with the credentials of your database. +The configuration can be optionally saved as a zip archive for inspection / debug. + +You can set most of the configuration options using a YAML file. +The default example file can be found in the `tutorials/stats` directory: + +.. code-block:: yaml + :linenos: + + # tutorials/stats/example_config.yaml + db: + driver: clickhousedb+connect + name: test + user: username + host: clickhouse + port: 8123 + table: otel_logs + +The file can then be used to parametrize the configuration script. + +.. code-block:: shell + + dff.stats tutorials/stats/example_config.yaml -P superset -dP pass -U superset --outfile=config_artifact.zip + +.. warning:: + + Here we passed passwords via CLI, which is not recommended. For enhanced security, call the command above omitting the passwords (`dff.stats -P -dP -U superset ...`) and you will be prompted to enter them interactively. + +Running the command will automatically import the dashboard as well as the data sources +into the running superset server. If you are using a version of Superset different from the one +shipped with DFF, make sure that your access rights are sufficient to edit the workspace. + +Using Superset +~~~~~~~~~~~~~~ + +| In order to view the imported dashboard, log into `Superset `_ using your username and password (which are both `superset` by default and can be configured via `.env_file`). +| The dashboard will then be available in the **Dashboards** section of the Superset UI under the name of **DFF stats**. +| The dashboard has four sections, each one of them containing different kind of data. + +* The **Overview** section summarizes the information about user interaction with your script. And displays a weighted graph of transitions from one node to another. The data is also shown in the form of a table for better introspection capabilities. + +.. figure:: ../_static/images/overview.png + + Overview plots. + +* The data displayed in the **General stats** section reports, how frequent each of the nodes in your script was visited by users. The information is aggregated in several forms for better interpretability. + +.. figure:: ../_static/images/general_stats.png + + General stats plots. + +* The **Additional stats** section includes charts for node visit counts aggregated over various specific variables. + +.. figure:: ../_static/images/additional_stats.png + + Additional stats plots. + +* General service load data aggregated over time can be found in the **Service stats** section. + +.. figure:: ../_static/images/service_stats.png + + Service stats plots. + +On some occasions, Superset can show warnings about the database connection being faulty. +In that case, you can navigate to the `Database Connections` section through the `Settings` menu and edit the `dff_database` instance updating the credentials. + +.. figure:: ../_static/images/databases.png + + Locate the database settings in the right corner of the screen. \ No newline at end of file diff --git a/setup.py b/setup.py index 70358188a..cdafaaec7 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,32 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: "pytelegrambotapi", ] +requests_requirements = [ + "requests==2.31.0", +] + +otl_dependencies = [ + "opentelemetry-api==1.17.0", + "opentelemetry-exporter-otlp==1.17.0", + "opentelemetry-exporter-otlp-proto-grpc==1.17.0", + "opentelemetry-exporter-otlp-proto-http==1.17.0", + "opentelemetry-instrumentation==0.38b0", + "opentelemetry-proto==1.17.0", + "opentelemetry-sdk==1.17.0", + "opentelemetry-semantic-conventions==0.38b0", +] + +stats_dependencies = merge_req_lists( + _sql_dependencies, + requests_requirements, + otl_dependencies, + [ + "wrapt==1.15.0", + "tqdm==4.62.3", + "omegaconf>=2.2.2", + ], +) + full = merge_req_lists( core, async_files_dependencies, @@ -88,13 +114,10 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: mysql_dependencies, postgresql_dependencies, ydb_dependencies, + stats_dependencies, telegram_dependencies, ) -requests_requirements = [ - "requests==2.31.0", -] - test_requirements = merge_req_lists( [ "pytest==7.4.0", @@ -105,6 +128,9 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: "click==8.1.3", "black==23.7.0", "isort==5.12.0", + "aiochclient>=2.2.0", + "httpx<=0.23.0", + "sqlparse==0.4.4", ], requests_requirements, ) @@ -162,6 +188,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: mypy_dependencies, ) + EXTRA_DEPENDENCIES = { "core": core, # minimal dependencies (by default) "json": async_files_dependencies, # dependencies for using JSON @@ -172,6 +199,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: "mysql": mysql_dependencies, # dependencies for using MySQL "postgresql": postgresql_dependencies, # dependencies for using PostgreSQL "ydb": ydb_dependencies, # dependencies for using Yandex Database + "stats": stats_dependencies, # dependencies for statistics collection "telegram": telegram_dependencies, # dependencies for using Telegram "full": full, # full dependencies including all options above "tests": test_requirements, # dependencies for running tests @@ -210,4 +238,5 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: install_requires=core, test_suite="tests", extras_require=EXTRA_DEPENDENCIES, + entry_points={"console_scripts": ["dff.stats=dff.stats.__main__:main"]}, ) diff --git a/tests/stats/__init__.py b/tests/stats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/stats/conftest.py b/tests/stats/conftest.py new file mode 100644 index 000000000..4fbee4ceb --- /dev/null +++ b/tests/stats/conftest.py @@ -0,0 +1,65 @@ +import pytest + +try: + from dff.stats import InMemoryLogExporter, InMemorySpanExporter, OTLPLogExporter, OTLPSpanExporter + from dff.stats.instrumentor import resource + from opentelemetry.sdk._logs.export import BatchLogRecordProcessor + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk._logs import LoggerProvider + + opentelemetry_available = True +except ImportError: + opentelemetry_available = False + + +@pytest.fixture(scope="function") +def tracer_exporter_and_provider(): + if not opentelemetry_available: + pytest.skip("One of the Opentelemetry packages is missing.") + exporter = InMemorySpanExporter() + tracer_provider = TracerProvider(resource=resource) + tracer_provider.add_span_processor(BatchSpanProcessor(exporter, schedule_delay_millis=900)) + yield exporter, tracer_provider + + +@pytest.fixture(scope="function") +def log_exporter_and_provider(): + if not opentelemetry_available: + pytest.skip("One of the Opentelemetry packages is missing.") + exporter = InMemoryLogExporter() + logger_provider = LoggerProvider(resource=resource) + logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter, schedule_delay_millis=900)) + yield exporter, logger_provider + + +@pytest.fixture(scope="function") +def otlp_trace_exp_provider(): + if not opentelemetry_available: + pytest.skip("One of the Opentelemetry packages is missing.") + exporter = OTLPSpanExporter("grpc://localhost:4317", insecure=True) + tracer_provider = TracerProvider(resource=resource) + tracer_provider.add_span_processor(BatchSpanProcessor(exporter, schedule_delay_millis=900)) + yield exporter, tracer_provider + + +@pytest.fixture(scope="function") +def otlp_log_exp_provider(): + if not opentelemetry_available: + pytest.skip("One of the Opentelemetry packages is missing.") + exporter = OTLPLogExporter("grpc://localhost:4317", insecure=True) + logger_provider = LoggerProvider(resource=resource) + logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter, schedule_delay_millis=900)) + yield exporter, logger_provider + + +@pytest.fixture(scope="session") # test saving configs to zip +def testing_cfg_dir(tmpdir_factory): + cfg_dir = tmpdir_factory.mktemp("cfg") + yield str(cfg_dir) + + +@pytest.fixture(scope="function") # test saving to csv +def testing_file(tmpdir_factory): + fn = tmpdir_factory.mktemp("data").join("stats.csv") + return str(fn) diff --git a/tests/stats/test_defaults.py b/tests/stats/test_defaults.py new file mode 100644 index 000000000..11c525723 --- /dev/null +++ b/tests/stats/test_defaults.py @@ -0,0 +1,52 @@ +import pytest + +try: + from wrapt import wrap_function_wrapper # noqa: F401 + from dff.stats import OtelInstrumentor +except ImportError: + pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.") + +from dff.script import Context +from dff.pipeline.types import ExtraHandlerRuntimeInfo, ServiceRuntimeInfo +from dff.stats import default_extractors + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "context,expected", + [ + (Context(), set()), + (Context(labels={0: ("a", "b")}), {("flow", "a"), ("node", "b"), ("label", "a: b")}), + ], +) +async def test_get_current_label(context: Context, expected: set): + result = await default_extractors.get_current_label(context, None, {"component": {"path": "."}}) + assert expected.intersection(set(result.items())) == expected + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "context,expected", + [ + (Context(), set()), + (Context(labels={0: ("a", "b")}), {("flow", "a"), ("node", "b"), ("label", "a: b")}), + ], +) +async def test_otlp_integration(context, expected, tracer_exporter_and_provider, log_exporter_and_provider): + _, tracer_provider = tracer_exporter_and_provider + log_exporter, logger_provider = log_exporter_and_provider + instrumentor = OtelInstrumentor() + if instrumentor.is_instrumented_by_opentelemetry: + instrumentor.uninstrument() + instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider) + runtime_info = ExtraHandlerRuntimeInfo( + func=lambda x: x, + stage="BEFORE", + component=ServiceRuntimeInfo( + path=".", name=".", timeout=None, asynchronous=False, execution_state={".": "FINISHED"} + ), + ) + _ = await default_extractors.get_current_label(context, None, runtime_info) + tracer_provider.force_flush() + logger_provider.force_flush() + assert len(log_exporter.get_finished_logs()) > 0 diff --git a/tests/stats/test_instrumentation.py b/tests/stats/test_instrumentation.py new file mode 100644 index 000000000..67356bc08 --- /dev/null +++ b/tests/stats/test_instrumentation.py @@ -0,0 +1,38 @@ +import pytest + +try: + from dff.stats import default_extractors + from dff.stats import OtelInstrumentor + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk._logs import LoggerProvider + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.trace import get_tracer_provider + from opentelemetry.metrics import get_meter_provider + from opentelemetry._logs import get_logger_provider +except ImportError: + pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.") + + +def test_instrument_uninstrument(): + instrumentor = OtelInstrumentor() + instrumentor.instrument() + assert hasattr(default_extractors.get_current_label, "__wrapped__") + assert hasattr(default_extractors.get_timing_before, "__wrapped__") + assert hasattr(default_extractors.get_timing_after, "__wrapped__") + instrumentor.uninstrument() + assert not hasattr(default_extractors.get_current_label, "__wrapped__") + assert not hasattr(default_extractors.get_timing_before, "__wrapped__") + assert not hasattr(default_extractors.get_timing_after, "__wrapped__") + + +def test_keyword_arguments(): + instrumentor = OtelInstrumentor() + assert instrumentor._meter_provider is get_meter_provider() + assert instrumentor._logger_provider is get_logger_provider() + assert instrumentor._tracer_provider is get_tracer_provider() + instrumentor.instrument( + tracer_provider=TracerProvider(), meter_provider=MeterProvider(), logger_provider=LoggerProvider() + ) + assert instrumentor._meter_provider is not get_meter_provider() + assert instrumentor._logger_provider is not get_logger_provider() + assert instrumentor._tracer_provider is not get_tracer_provider() diff --git a/tests/stats/test_main.py b/tests/stats/test_main.py new file mode 100644 index 000000000..ac72fbf9b --- /dev/null +++ b/tests/stats/test_main.py @@ -0,0 +1,151 @@ +import os +import pytest +from urllib import parse +from zipfile import ZipFile +from argparse import Namespace + +try: + import omegaconf # noqa: F401 + from dff.stats.__main__ import main + from dff.stats.cli import DEFAULT_SUPERSET_URL, DASHBOARD_SLUG + from dff.stats.utils import get_superset_session, drop_superset_assets +except ImportError: + pytest.skip(reason="`OmegaConf` dependency missing.", allow_module_level=True) + +from tests.context_storages.test_dbs import ping_localhost +from tests.test_utils import get_path_from_tests_to_current_dir + +SUPERSET_ACTIVE = ping_localhost(8088) +path_to_addon = get_path_from_tests_to_current_dir(__file__) + + +def dashboard_display_test(args: Namespace, session, headers, base_url: str): + dashboard_url = parse.urljoin(base_url, f"/api/v1/dashboard/{DASHBOARD_SLUG}") + charts_url = parse.urljoin(base_url, "/api/v1/chart") + datasets_url = parse.urljoin(base_url, "/api/v1/dataset") + database_conn_url = parse.urljoin(base_url, "/api/v1/database/test_connection") + db_driver, db_user, db_password, db_host, db_port, db_name = ( + getattr(args, "db.driver"), + getattr(args, "db.user"), + getattr(args, "db.password"), + getattr(args, "db.host"), + getattr(args, "db.port"), + getattr(args, "db.name"), + ) + sqla_url = f"{db_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}" + database_data = { + "configuration_method": "sqlalchemy_form", + "database_name": "dff_database", + "driver": "string", + "engine": None, + "extra": "", + "impersonate_user": False, + "masked_encrypted_extra": "", + "parameters": {}, + "server_cert": None, + "sqlalchemy_uri": sqla_url, + "ssh_tunnel": None, + } + + database_res = session.post(database_conn_url, json=database_data, headers=headers) + assert database_res.status_code == 200 + dashboard_res = session.get(dashboard_url, headers=headers) + assert dashboard_res.status_code == 200 + dashboard_json = dashboard_res.json() + assert sorted(dashboard_json["result"]["charts"]) == [ + "Flow visit ratio monitor", + "Node Visits", + "Node counts", + "Node visit ratio monitor", + "Node visits [cloud]", + "Node visits [ratio]", + "Node visits [sunburst]", + "Service load [users]", + "Table", + "Terminal labels", + "Transition counts", + "Transition layout", + "Transition ratio [chord]", + ] + assert dashboard_json["result"]["url"] == "/superset/dashboard/dff-stats/" + assert dashboard_json["result"]["dashboard_title"] == "DFF Stats" + datasets_result = session.get(datasets_url, headers=headers) + datasets_json = datasets_result.json() + assert datasets_json["count"] == 2 + assert datasets_json["ids"] == [1, 2] + assert [item["id"] for item in datasets_json["result"]] == [1, 2] + assert sorted([item["table_name"] for item in datasets_json["result"]]) == [ + "dff_final_nodes", + "dff_node_stats", + ] + charts_result = session.get(charts_url, headers=headers) + charts_json = charts_result.json() + assert charts_json["count"] == 14 + assert sorted(charts_json["ids"]) == list(range(1, 15)) + session.close() + + +@pytest.mark.skipif(not SUPERSET_ACTIVE, reason="Superset server not active") +@pytest.mark.parametrize( + ["args"], + [ + ( + Namespace( + **{ + "outfile": "1.zip", + "db.driver": "clickhousedb+connect", + "db.host": "clickhouse", + "db.port": "8123", + "db.name": "test", + "db.table": "otel_logs", + "host": "localhost", + "port": "8088", + "file": f"tutorials/{path_to_addon}/example_config.yaml", + } + ), + ), + ], +) +@pytest.mark.docker +def test_main(testing_cfg_dir, args): + args.__dict__.update( + { + "db.password": os.environ["CLICKHOUSE_PASSWORD"], + "username": os.environ["SUPERSET_USERNAME"], + "password": os.environ["SUPERSET_PASSWORD"], + "db.user": os.environ["CLICKHOUSE_USER"], + } + ) + args.outfile = testing_cfg_dir + args.outfile + session, headers = get_superset_session(args, DEFAULT_SUPERSET_URL) + dashboard_url = parse.urljoin(DEFAULT_SUPERSET_URL, "/api/v1/dashboard/") + + drop_superset_assets(session, headers, DEFAULT_SUPERSET_URL) + dashboard_result = session.get(dashboard_url, headers=headers) + dashboard_json = dashboard_result.json() + assert dashboard_json["count"] == 0 + + main(args) + dashboard_display_test(args, session, headers, base_url=DEFAULT_SUPERSET_URL) + assert os.path.exists(args.outfile) + assert os.path.isfile(args.outfile) + assert os.path.getsize(args.outfile) > 2200 + with ZipFile(args.outfile) as file: + file.extractall(testing_cfg_dir) + database = omegaconf.OmegaConf.load(os.path.join(testing_cfg_dir, "superset_dashboard/databases/dff_database.yaml")) + sqlalchemy_uri = omegaconf.OmegaConf.select(database, "sqlalchemy_uri") + arg_vars = vars(args) + driver, user, host, port, name = ( + arg_vars["db.driver"], + arg_vars["db.user"], + arg_vars["db.host"], + arg_vars["db.port"], + arg_vars["db.name"], + ) + assert sqlalchemy_uri == f"{driver}://{user}:XXXXXXXXXX@{host}:{port}/{name}" + + +@pytest.mark.parametrize(["cmd"], [("dff.stats -h",), ("dff.stats --help",)]) +def test_help(cmd): + res = os.system(cmd) + assert res == 0 diff --git a/tests/stats/test_patch.py b/tests/stats/test_patch.py new file mode 100644 index 000000000..284ea0b69 --- /dev/null +++ b/tests/stats/test_patch.py @@ -0,0 +1,19 @@ +import pytest + +try: + from dff import stats # noqa: F401 + from opentelemetry.proto.common.v1.common_pb2 import AnyValue + from opentelemetry.exporter.otlp.proto.grpc.exporter import _translate_value +except ImportError: + pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.") + + +@pytest.mark.parametrize( + ["value", "expected_field"], [(1, "int_value"), ({"a": "b"}, "kvlist_value"), (None, "string_value")] +) +def test_body_translation(value, expected_field): + assert _translate_value.__wrapped__.__name__ == "_translate_value" + translated_value = _translate_value(value) + assert isinstance(translated_value, AnyValue) + assert translated_value.IsInitialized() + assert getattr(translated_value, expected_field, None) is not None diff --git a/tests/stats/test_tutorials.py b/tests/stats/test_tutorials.py new file mode 100644 index 000000000..969dc6bf5 --- /dev/null +++ b/tests/stats/test_tutorials.py @@ -0,0 +1,90 @@ +import os +import importlib +import pytest +import asyncio + +from tests.test_utils import get_path_from_tests_to_current_dir +from tests.context_storages.test_dbs import ping_localhost +from dff.utils.testing.common import check_happy_path +from dff.utils.testing.toy_script import HAPPY_PATH + +try: + from aiochclient import ChClient + from httpx import AsyncClient + from dff import stats # noqa: F401 +except ImportError: + pytest.skip(allow_module_level=True, reason="There are dependencies missing.") + + +dot_path_to_addon = get_path_from_tests_to_current_dir(__file__) + + +COLLECTOR_AVAILABLE = ping_localhost(4317) +CLICKHOUSE_AVAILABLE = ping_localhost(8123) +CLICKHOUSE_USER = os.getenv("CLICKHOUSE_USER") +CLICKHOUSE_PASSWORD = os.getenv("CLICKHOUSE_PASSWORD") +CLICKHOUSE_DB = os.getenv("CLICKHOUSE_DB") + + +@pytest.mark.skipif(not CLICKHOUSE_AVAILABLE, reason="Clickhouse unavailable.") +@pytest.mark.skipif(not COLLECTOR_AVAILABLE, reason="OTLP collector unavailable.") +@pytest.mark.skipif( + not all([CLICKHOUSE_USER, CLICKHOUSE_PASSWORD, CLICKHOUSE_DB]), reason="Clickhouse credentials missing" +) +@pytest.mark.asyncio +@pytest.mark.parametrize( + ["example_module_name", "expected_logs"], + [ + ("1_extractor_functions", 10), + ("2_pipeline_integration", 35), + ], +) +@pytest.mark.docker +async def test_examples_ch(example_module_name: str, expected_logs, otlp_log_exp_provider, otlp_trace_exp_provider): + module = importlib.import_module(f"tutorials.{dot_path_to_addon}.{example_module_name}") + _, tracer_provider = otlp_trace_exp_provider + _, logger_provider = otlp_log_exp_provider + http_client = AsyncClient() + table = "otel_logs" + ch_client = ChClient(http_client, user=CLICKHOUSE_USER, password=CLICKHOUSE_PASSWORD, database=CLICKHOUSE_DB) + + try: + await ch_client.execute(f"TRUNCATE {table}") + pipeline = module.pipeline + module.dff_instrumentor.uninstrument() + module.dff_instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider) + check_happy_path(pipeline, HAPPY_PATH) + await asyncio.sleep(1) + count = await ch_client.fetchval(f"SELECT COUNT (*) FROM {table}") + assert count == expected_logs + + except Exception as exc: + raise Exception(f"model_name=tutorials.{dot_path_to_addon}.{example_module_name}") from exc + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ["example_module_name", "expected_logs"], + [ + ("1_extractor_functions", 10), + ("2_pipeline_integration", 35), + ], +) +async def test_examples_memory( + example_module_name: str, expected_logs, tracer_exporter_and_provider, log_exporter_and_provider +): + module = importlib.import_module(f"tutorials.{dot_path_to_addon}.{example_module_name}") + _, tracer_provider = tracer_exporter_and_provider + log_exporter, logger_provider = log_exporter_and_provider + try: + pipeline = module.pipeline + module.dff_instrumentor.uninstrument() + module.dff_instrumentor.instrument(logger_provider=logger_provider, tracer_provider=tracer_provider) + check_happy_path(pipeline, HAPPY_PATH) + tracer_provider.force_flush() + logger_provider.force_flush() + await asyncio.sleep(1) + assert len(log_exporter.get_finished_logs()) == expected_logs + + except Exception as exc: + raise Exception(f"model_name=tutorials.{dot_path_to_addon}.{example_module_name}") from exc diff --git a/tutorials/stats/1_extractor_functions.py b/tutorials/stats/1_extractor_functions.py new file mode 100644 index 000000000..20f371ba6 --- /dev/null +++ b/tutorials/stats/1_extractor_functions.py @@ -0,0 +1,111 @@ +# %% [markdown] +""" +# 1. Extractor Functions + +The following example covers the basics of using the `stats` module. + +Statistics are collected from pipeline services by extractor functions +that report the state of one or more pipeline components. The `stats` module +provides several default extractors, but users are free to define their own +extractor functions. + +It is required that the extractors have the following uniform signature: +the expected arguments are always `Context`, `Pipeline`, and `ExtraHandlerRuntimeInfo`, +while the expected return value is an arbitrary `dict` or a `None`. It is a preferred practice +to define extractors as asynchronous functions. + +The output of these functions will be captured by an OpenTelemetry instrumentor and directed to +the Opentelemetry collector server which in its turn batches and persists data +to Clickhouse or other OLAP storages. +For more information on OpenTelemetry instrumentation, +refer to the body of this tutorial as well as [OpenTelemetry documentation]( +https://opentelemetry.io/docs/instrumentation/python/manual/ +). + +""" + +# %pip install dff[stats] + +# %% +import asyncio + +from dff.script import Context +from dff.pipeline import Pipeline, ACTOR, Service, ExtraHandlerRuntimeInfo, to_service +from dff.utils.testing.toy_script import TOY_SCRIPT, HAPPY_PATH +from dff.stats import OtelInstrumentor, default_extractors +from dff.utils.testing import is_interactive_mode, check_happy_path + + +# %% [markdown] +""" +The cells below configure log export with the help of OTLP instrumentation. + +* The initial step is to configure the export destination. +`from_url` method of the `OtelInstrumentor` class simplifies this task +allowing you to only pass the url of the OTLP Collector server. + +* Alternatively, you can use the utility functions provided by the `stats` module: +`set_logger_destination`, `set_tracer_destination`, or `set_meter_destination`. These accept +an appropriate Opentelemetry exporter instance and bind it to provider classes. + +* Nextly, the `OtelInstrumentor` class should be constructed to log the output +of extractor functions. Custom extractors can be decorated with the `OtelInstrumentor` instance. +Default extractors are instrumented by calling the `instrument` method. + +* The entirety of the process is illustrated in the example below. + +""" + + +# %% +dff_instrumentor = OtelInstrumentor.from_url("grpc://localhost:4317") +dff_instrumentor.instrument() + +# %% [markdown] +""" +The following cell shows a custom extractor function. The data obtained from +the context and the runtime information gets shaped as a dict and returned +from the function body. The `dff_instrumentor` decorator then ensures +that the output is logged by OpenTelemetry. + +""" + + +# %% +# decorated by an OTLP Instrumentor instance +@dff_instrumentor +async def get_service_state(ctx: Context, _, info: ExtraHandlerRuntimeInfo): + # extract the execution state of a target service + data = { + "execution_state": info.component.execution_state, + } + # return the state as an arbitrary dict for further logging + return data + + +# %% +# configure `get_service_state` to run after the `heavy_service` +@to_service(after_handler=[get_service_state]) +async def heavy_service(ctx: Context): + _ = ctx # get something from ctx if needed + await asyncio.sleep(0.02) + + +# %% +pipeline = Pipeline.from_dict( + { + "script": TOY_SCRIPT, + "start_label": ("greeting_flow", "start_node"), + "fallback_label": ("greeting_flow", "fallback_node"), + "components": [ + heavy_service, + Service(handler=ACTOR, after_handler=[default_extractors.get_current_label]), + ], + } +) + + +if __name__ == "__main__": + check_happy_path(pipeline, HAPPY_PATH) + if is_interactive_mode(): + pipeline.run() diff --git a/tutorials/stats/2_pipeline_integration.py b/tutorials/stats/2_pipeline_integration.py new file mode 100644 index 000000000..de81a3593 --- /dev/null +++ b/tutorials/stats/2_pipeline_integration.py @@ -0,0 +1,110 @@ +# %% [markdown] +""" +# 2. Pipeline Integration + +In the DFF ecosystem, extractor functions act as regular extra handlers ( +[see the pipeline module documentation](%doclink(tutorial,pipeline.7_extra_handlers_basic)) +). +Hence, you can decorate any part of your pipeline, including services, +service groups and the pipeline as a whole, to obtain the statistics +specific for that component. Some examples of this functionality +are showcased in this tutorial. + +""" + +# %pip install dff[stats] + +# %% +import asyncio + +from dff.script import Context +from dff.pipeline import ( + Pipeline, + ACTOR, + Service, + ExtraHandlerRuntimeInfo, + ServiceGroup, + GlobalExtraHandlerType, +) +from dff.utils.testing.toy_script import TOY_SCRIPT, HAPPY_PATH +from dff.stats import OtelInstrumentor, set_logger_destination, set_tracer_destination +from dff.stats import OTLPLogExporter, OTLPSpanExporter +from dff.stats import default_extractors +from dff.utils.testing import is_interactive_mode, check_happy_path + +# %% +set_logger_destination(OTLPLogExporter("grpc://localhost:4317", insecure=True)) +set_tracer_destination(OTLPSpanExporter("grpc://localhost:4317", insecure=True)) +dff_instrumentor = OtelInstrumentor() +dff_instrumentor.instrument() + + +# example extractor function +@dff_instrumentor +async def get_service_state(ctx: Context, _, info: ExtraHandlerRuntimeInfo): + # extract execution state of service from info + data = { + "execution_state": info.component.execution_state, + } + # return a record to save into connected database + return data + + +# %% +# example service +async def heavy_service(ctx: Context): + _ = ctx # get something from ctx if needed + await asyncio.sleep(0.02) + + +# %% [markdown] +""" + +The many ways in which you can use extractor functions are shown in the following +pipeline definition. The functions are used to obtain statistics from respective components: + +* A service group of two `heavy_service` instances. +* An `Actor` service. +* The pipeline as a whole. + +As is the case with the regular extra handler functions, you can wire the extractors +to run either before or after the target service. As a result, you can compare +the pre-service and post-service states of the context to measure the performance +of various components, etc. + +""" +# %% +pipeline = Pipeline.from_dict( + { + "script": TOY_SCRIPT, + "start_label": ("greeting_flow", "start_node"), + "fallback_label": ("greeting_flow", "fallback_node"), + "components": [ + ServiceGroup( + before_handler=[default_extractors.get_timing_before], + after_handler=[ + get_service_state, + default_extractors.get_timing_after, + ], + components=[{"handler": heavy_service}, {"handler": heavy_service}], + ), + Service( + handler=ACTOR, + before_handler=[default_extractors.get_timing_before], + after_handler=[ + get_service_state, + default_extractors.get_timing_after, + default_extractors.get_current_label, + ], + ), + ], + } +) +pipeline.add_global_handler(GlobalExtraHandlerType.BEFORE_ALL, default_extractors.get_timing_before) +pipeline.add_global_handler(GlobalExtraHandlerType.AFTER_ALL, default_extractors.get_timing_after) +pipeline.add_global_handler(GlobalExtraHandlerType.AFTER_ALL, get_service_state) + +if __name__ == "__main__": + check_happy_path(pipeline, HAPPY_PATH) + if is_interactive_mode(): + pipeline.run() diff --git a/tutorials/stats/example_config.yaml b/tutorials/stats/example_config.yaml new file mode 100644 index 000000000..f01db7d35 --- /dev/null +++ b/tutorials/stats/example_config.yaml @@ -0,0 +1,7 @@ +db: + driver: clickhousedb+connect + name: test + user: username + host: clickhouse + port: 8123 + table: otel_logs \ No newline at end of file