config.yml.example

## ================= Elastic Connectors Configuration ==================
#
## NOTE: Elastic Connectors comes with reasonable defaults.
##       Before adjusting the configuration, make sure you understand what you
##       are trying to accomplish and the consequences.
#
#
## ------------------------------- Connectors -------------------------------
#
##  The list of self-managed connectors/customized connectors configurations.
##    Each object in the list requires `connector_id` and `service_type`.
##    An example is:
##    connectors:
##      - connector_id: changeme # the ID of the connector.
##        service_type: changeme # The service type of the connector.
##        api_key: changeme # The Elasticsearch API key used to write data into the content index.
#connectors: []
#
#
##  The ID of the connector.
##    (Deprecated. Configure the self-managed connectors in an object in the `connectors` list)
#connector_id: null
#
#
##  The service type of the connector.
##    (Deprecated. Configure the self-managed connectors in an object in the `connectors` list)
#service_type: null
#
#
## ------------------------------- Elasticsearch -------------------------------
#
## The host of the Elasticsearch deployment.
#elasticsearch.host: http://localhost:9200
#
#
## The API key for Elasticsearch connection.
##    Using `api_key` is recommended instead of `username`/`password`.
#elasticsearch.api_key: null
#
#
##  The username for the Elasticsearch connection.
##    Using `username` requires `password` to also be configured.
##    However, `elasticsearch.api_key` is the recommended configuration choice.
#elasticsearch.username: elastic
#
#
##  The password for the Elasticsearch connection.
##    Using `password` requires `username` to also be configured.
##    However, `elasticsearch.api_key` is the recommended configuration choice.
#elasticsearch.password: changeme
#
#
##  Whether SSL is used for the Elasticsearch connection.
#elasticsearch.ssl: true
#
#
##  Whether to perform verification checks for server certificates using CA bundle.
##    This option should be avoided in production.
#elasticsearch.verify_certs: true
#
#
##  Path to a CA bundle, e.g. /path/to/ca.crt
#elasticsearch.ca_certs: null
#
#
##  Whether to retry on request timeout.
#elasticsearch.retry_on_timeout: true
#
#
##  The request timeout to be passed to transport in options.
#elasticsearch.request_timeout: 120
#
#
##  The maximum wait duration (in seconds) for the Elasticsearch connection.
#elasticsearch.max_wait_duration: 60
#
#
##  The initial backoff duration (in seconds).
#elasticsearch.initial_backoff_duration: 1
#
#
##  The backoff multiplier.
#elasticsearch.backoff_multiplier: 2
#
#
##  Elasticsearch log level
#elasticsearch.log_level: INFO
#
#
##  Maximum number of times failed Elasticsearch requests are retried, except bulk requests
#elasticsearch.max_retries: 5
#
#
##  Retry interval between failed Elasticsearch requests, except bulk requests
#elasticsearch.retry_interval: 10
#
#
## ------------------------------- Elasticsearch: Bulk ------------------------
#
##  Options for the Bulk API calls behavior - all options can be
##    overridden by each source class
#
#
##  The number of docs between each counters display.
#elasticsearch.bulk.display_every: 100
#
#
##  The max size of the bulk queue
#elasticsearch.bulk.queue_max_size: 1024
#
#
##  The max size in MB of the bulk queue.
##    When it's reached, the next put operation waits for the queue size to
##    get under that limit.
#elasticsearch.bulk.queue_max_mem_size: 25
#
#
##  Minimal interval of time between MemQueue checks for being full
#elasticsearch.bulk.queue_refresh_interval: 1
#
#
##  Maximal interval of time during which MemQueue does not dequeue a single document
##  For example, if no documents were sent to Elasticsearch within 60 seconds because of
##  Elasticsearch being overloaded, then an error will be raised.
##  This mechanism exists to be a circuit-breaker for stuck jobs and stuck Elasticsearch.
#elasticsearch.bulk.queue_refresh_timeout: 60
#
#
##  The max size in MB of a bulk request.
##    When the next request being prepared reaches that size, the query is
##    emitted even if `chunk_size` is not yet reached.
#elasticsearch.bulk.chunk_max_mem_size: 5
#
#
##  The max size of the bulk operation to Elasticsearch.
#elasticsearch.bulk.chunk_size: 500
#
#
##  Maximum number of concurrent bulk requests.
#elasticsearch.bulk.max_concurrency: 5
#
#
##  Maximum number of concurrent downloads in the backend.
#elasticsearch.bulk.concurrent_downloads: 10
#
#
##  Maximum number of times failed bulk requests are retried
#elasticsearch.bulk.max_retries: 5
#
#
##  Retry interval between failed bulk attempts
#elasticsearch.bulk.retry_interval: 10
#
#
##  Enable to log ids of created/indexed/deleted/updated documents during a sync.
##    This will be logged on 'DEBUG' log level. Note: this depends on the service.log_level, not elasticsearch.log_level
#elasticsearch.bulk.enable_operations_logging: false
#
## ------------------------------- Elasticsearch: Experimental ------------------------
#
##  Experimental configuration options for Elasticsearch interactions.
#
#
##  Enable usage of Connectors API instead of calling connectors indices directly
##  Using direct index access is deprecated, and will be disallowed entirely in a future version
#elasticsearch.feature_use_connectors_api: true
## ------------------------------- Service ----------------------------------
#
##  Connector service/framework related configurations
#
#
##  The interval (in seconds) to poll connectors from Elasticsearch.
#service.idling: 30
#
#
##  The interval (in seconds) to send a new heartbeat for a connector.
#service.heartbeat: 300
#
#
##  The maximum number of retries for pre-flight check.
#service.preflight_max_attempts: 10
#
#
##  The number of seconds to wait between each pre-flight check.
#service.preflight_idle: 30
#
#
##  The maximum number of errors allowed in one event loop.
#service.max_errors: 20
#
#
##  The number of seconds to reset `max_errors` count.
#service.max_errors_span: 600
#
#
##  The maximum number of concurrent content syncs.
#service.max_concurrent_content_syncs: 1
#
#
##  The maximum number of concurrent access control syncs.
#service.max_concurrent_access_control_syncs: 1
#
#
##  The maximum size (in bytes) of files that the framework should be willing
##    to download and/or process.
#service.max_file_download_size: 10485760
#
##  The interval (in seconds) to run job cleanup task.
#service.job_cleanup_interval: 300
#
#
##  Connector service log level.
#service.log_level: INFO
#
#
## ------------------------------- Extraction Service ----------------------------------
#
##  Local extraction service-related configurations.
##    These configurations are optional and are not included by default.
##    The presence of these configurations enables local content extraction.
##    By default, this whole object is `null`.
##    See: https://www.elastic.co/guide/en/elasticsearch/reference/current/es-connectors-content-extraction.html#connectors-content-extraction-local
#
#
##  The host of the local extraction service.
#extraction_service.host: null
#
#
##  Request timeout for local extraction service requests, in seconds.
#extraction_service.timeout: 30
#
#
##  Whether or not to use file pointers for local extraction.
##    This can have very positive impacts on performance -
##    both speed and memory consumption.
##    However, it also requires that the Connectors deployment and the
##    local extraction service deployment must share a filesystem.
#extraction_service.use_file_pointers: False
#
#
##  The size (in bytes) that files are chunked to for streaming when sending
##    a file to the local extraction service.
##    Only applicable if `extraction_service.use_file_pointers` is `false`.
#extraction_service.stream_chunk_size: 65536
#
#
##  The location for files to be extracted from.
##    Only applicable if `extraction_service.use_file_pointers` is `true`.
#extraction_service.shared_volume_dir: /app/files
#
#
## ------------------------------- Sources ----------------------------------
#
##  An object mapping service type names to class Fully Qualified Names
##    E.g. `connectors.sources.mongo:MongoDataSource`.
##    If adding a net-new connector, it must be added here for the framework to detect it.
##    Default includes all tech preview, beta, and GA connectors in this repository.
##    An example is:
##    sources:
##      mongodb: connectors.sources.mongo:MongoDataSource