Skip to content

Performance python 3.11 vs 3.12

Jörg Zimmermann edited this page Nov 23, 2023 · 4 revisions

setup

  • quickstart setup in this repository with a custom config.

  • started docker-compose -f ./quickstart/docker-compose.yml up -d kafka opensearch prometheus grafana

  • same inputdata for both scenarios

  • logprep was startet from pwsh commandline $env:PYTHONPATH="."; $env:PROMETHEUS_MULTIPROC_DIR="/tmp/logprep"; python logprep/run_logprep.py ./quickstart/exampledata/config/pipeline.yml

  • same configuration for both scenarios

---
version: 1
process_count: 2
timeout: 0.1
logger:
  level: ERROR

metrics:
  enabled: true
  port: 8000

pipeline:
- dissector:
    type: dissector
    specific_rules:
    - quickstart/exampledata/rules/030_dissector/rules_specific/
    generic_rules:
    - quickstart/exampledata/rules/030_dissector/rules_generic/

- grokker:
    type: grokker
    specific_rules:
    - quickstart/exampledata/rules/035_grokker/rules_specific/
    generic_rules:
    - quickstart/exampledata/rules/035_grokker/rules_generic/

- field_manager_a:
    type: field_manager
    generic_rules:
    - quickstart/exampledata/rules/041_field_manager/generic_rules
    specific_rules:
    - quickstart/exampledata/rules/041_field_manager/specific_rules

- string_splitter:
    type: string_splitter
    specific_rules:
    - quickstart/exampledata/rules/042_string_splitter/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/042_string_splitter/generic_rules/

- timestamper:
    type: timestamper
    specific_rules:
      - quickstart/exampledata/rules/043_timestamper/rules_specific/
    generic_rules:
      - quickstart/exampledata/rules/043_timestamper/rules_generic/

- calculator:
    type: calculator
    specific_rules:
    - quickstart/exampledata/rules/045_calculator/rules_specific/
    generic_rules:
    - quickstart/exampledata/rules/045_calculator/rules_generic/

- timestamp_differ:
    type: timestamp_differ
    specific_rules:
    - quickstart/exampledata/rules/050_timestamp_differ/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/050_timestamp_differ/generic_rules/

- labelername:
    type: labeler
    schema: quickstart/exampledata/rules/060_labeler/schema/schema.json
    include_parent_labels: true
    generic_rules:
    - quickstart/exampledata/rules/060_labeler/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/060_labeler/specific_rules/

- domain_resolver:
    type: domain_resolver
    specific_rules:
    - quickstart/exampledata/rules/070_domain_resolver/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/070_domain_resolver/generic_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json
    tld_lists: ["quickstart/exampledata/lists/public_suffix_list.dat"]
    timeout: 10.0
    hash_salt: "thisisasecureandrandomkey"
    max_caching_days: 1
    max_cached_domains: 20000

- domain_label_extractor:
    type: domain_label_extractor
    tld_lists: ["quickstart/exampledata/lists/public_suffix_list.dat"]
    specific_rules:
    - quickstart/exampledata/rules/080_domain_label_extractor/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/080_domain_label_extractor/generic_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- datetime_extractor:
    type: datetime_extractor
    generic_rules:
    - quickstart/exampledata/rules/100_datetime_extractor/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/100_datetime_extractor/specific_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- generic_adder:
    type: generic_adder
    generic_rules:
    - quickstart/exampledata/rules/110_generic_adder/generic_rules
    specific_rules:
    - quickstart/exampledata/rules/110_generic_adder/specific_rules
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- build_indexname:
    type: concatenator
    specific_rules:
    - quickstart/exampledata/rules/115_concatenator/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/115_concatenator/generic_rules/

- generic_resolver:
    type: generic_resolver
    generic_rules:
    - quickstart/exampledata/rules/120_generic_resolver/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/120_generic_resolver/specific_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- template_replacer:
    type: template_replacer
    generic_rules:
    - quickstart/exampledata/rules/130_template_replacer/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/130_template_replacer/specific_rules/
    template: quickstart/exampledata/rules/130_template_replacer/templates.yml
    pattern:
      delimiter: "-"
      fields:
      - winlog.channel
      - winlog.provider_name
      - winlog.event_id
      allowed_delimiter_field: winlog.provider_name
      target_field: message
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- list_comparison:
    type: list_comparison
    generic_rules:
    - quickstart/exampledata/rules/140_list_comparison/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/140_list_comparison/specific_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json
    list_search_base_path: ./quickstart/exampledata/lists

- amides:
    type: amides
    generic_rules:
    - quickstart/exampledata/rules/145_amides/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/145_amides/specific_rules/
    tree_config: quickstart/exampledata/artifacts/tree_config.json
    models_path: quickstart/exampledata/models/model.zip
    num_rule_attributions: 10
    max_cache_entries: 1000000
    decision_threshold: 0.32

- pre_detector:
    type: pre_detector
    generic_rules:
    - quickstart/exampledata/rules/150_pre_detector/generic_rules/
    specific_rules:
    - quickstart/exampledata/rules/150_pre_detector/specific_rules/
    outputs:
    - opensearch: sre
    tree_config: quickstart/exampledata/artifacts/tree_config.json

- pseudonymizer:
    type: pseudonymizer
    pubkey_analyst: quickstart/exampledata/rules/160_pseudonymizer/example_analyst_pub.pem
    pubkey_depseudo: quickstart/exampledata/rules/160_pseudonymizer/example_depseudo_pub.pem
    regex_mapping: quickstart/exampledata/artifacts/regex_mapping.yml
    hash_salt: "thisisasecureandrandomkey"
    outputs:
    - opensearch: pseudonyms
    specific_rules:
    - quickstart/exampledata/rules/160_pseudonymizer/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/160_pseudonymizer/generic_rules/
    max_cached_pseudonyms: 1000000

- field_manager_b:
    type: field_manager
    generic_rules:
    - quickstart/exampledata/rules/165_field_manager/generic_rules
    specific_rules:
    - quickstart/exampledata/rules/165_field_manager/specific_rules

- selective_extractor:
    type: selective_extractor
    specific_rules:
    - quickstart/exampledata/rules/170_selective_extractor/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/170_selective_extractor/generic_rules/

- dropper:
    type: dropper
    specific_rules:
    - quickstart/exampledata/rules/180_dropper/specific_rules/
    generic_rules:
    - quickstart/exampledata/rules/180_dropper/generic_rules/

input:
  kafka:
    type: confluentkafka_input
    topic: consumer
    kafka_config:
      bootstrap.servers: 127.0.0.1:9092
      group.id: cgroup3
      enable.auto.commit: "false"
      enable.auto.offset.store: "false"
    preprocessing:
      version_info_target_field: Logprep_version_info
      log_arrival_time_target_field: event.ingested
      hmac:
        target: <RAW_MSG>
        key: "thisisasecureandrandomkey"
        output_field: Full_event

output:
  opensearch:
    type: opensearch_output
    hosts:
      - 127.0.0.1:9200
    default_index: processed
    error_index: errors
    message_backlog_size: 10000
    timeout: 10000
    flush_timeout: 600
    max_retries: 3
    user: admin
    secret: admin
  kafka:
    type: confluentkafka_output
    default: false
    topic: producer
    error_topic: errors
    flush_timeout: 300
    kafka_config:
      bootstrap.servers: 127.0.0.1:9092

procedure

  • wait for zero line in grafana
  • start logprep in venv from commandline
  • wait for 7 minutes
  • get comparable time frame (incomming events, commit success)

python 3.12

first run

image

second run

image

third run

image

python 3.11

first run

image

second run

image

third run

image

python 3.10 validation

Because the values are pretty close I did a validation with the oldest supported python version to ensure the measurement method ist valid. And we all know, that python 3.11 should be significantly faster than 3.10.

image

Conclusion

  1. The measurement method seems to be valid
  2. Python 3.11 is still the fastest option to run logprep
  3. All this could change if we use newer python features as unlocked with the upcoming release 9.0.0 where we will drop the support of python 3.9