-
Notifications
You must be signed in to change notification settings - Fork 138
/
config.yml.example
260 lines (260 loc) · 8.37 KB
/
config.yml.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
## ================= Elastic Connectors Configuration ==================
#
## NOTE: Elastic Connectors comes with reasonable defaults.
## Before adjusting the configuration, make sure you understand what you
## are trying to accomplish and the consequences.
#
#
## ------------------------------- Connectors -------------------------------
#
## The list of self-managed connectors/customized connectors configurations.
## Each object in the list requires `connector_id` and `service_type`.
## An example is:
## connectors:
## - connector_id: changeme # the ID of the connector.
## service_type: changeme # The service type of the connector.
## api_key: changeme # The Elasticsearch API key used to write data into the content index.
#connectors: []
#
#
## The ID of the connector.
## (Deprecated. Configure the self-managed connectors in an object in the `connectors` list)
#connector_id: null
#
#
## The service type of the connector.
## (Deprecated. Configure the self-managed connectors in an object in the `connectors` list)
#service_type: null
#
#
## ------------------------------- Elasticsearch -------------------------------
#
## The host of the Elasticsearch deployment.
#elasticsearch.host: http://localhost:9200
#
#
## The API key for Elasticsearch connection.
## Using `api_key` is recommended instead of `username`/`password`.
#elasticsearch.api_key: null
#
#
## The username for the Elasticsearch connection.
## Using `username` requires `password` to also be configured.
## However, `elasticsearch.api_key` is the recommended configuration choice.
#elasticsearch.username: elastic
#
#
## The password for the Elasticsearch connection.
## Using `password` requires `username` to also be configured.
## However, `elasticsearch.api_key` is the recommended configuration choice.
#elasticsearch.password: changeme
#
#
## Whether SSL is used for the Elasticsearch connection.
#elasticsearch.ssl: true
#
#
## Whether to perform verification checks for server certificates using CA bundle.
## This option should be avoided in production.
#elasticsearch.verify_certs: true
#
#
## Path to a CA bundle, e.g. /path/to/ca.crt
#elasticsearch.ca_certs: null
#
#
## Whether to retry on request timeout.
#elasticsearch.retry_on_timeout: true
#
#
## The request timeout to be passed to transport in options.
#elasticsearch.request_timeout: 120
#
#
## The maximum wait duration (in seconds) for the Elasticsearch connection.
#elasticsearch.max_wait_duration: 60
#
#
## The initial backoff duration (in seconds).
#elasticsearch.initial_backoff_duration: 1
#
#
## The backoff multiplier.
#elasticsearch.backoff_multiplier: 2
#
#
## Elasticsearch log level
#elasticsearch.log_level: INFO
#
#
## Maximum number of times failed Elasticsearch requests are retried, except bulk requests
#elasticsearch.max_retries: 5
#
#
## Retry interval between failed Elasticsearch requests, except bulk requests
#elasticsearch.retry_interval: 10
#
#
## ------------------------------- Elasticsearch: Bulk ------------------------
#
## Options for the Bulk API calls behavior - all options can be
## overridden by each source class
#
#
## The number of docs between each counters display.
#elasticsearch.bulk.display_every: 100
#
#
## The max size of the bulk queue
#elasticsearch.bulk.queue_max_size: 1024
#
#
## The max size in MB of the bulk queue.
## When it's reached, the next put operation waits for the queue size to
## get under that limit.
#elasticsearch.bulk.queue_max_mem_size: 25
#
#
## Minimal interval of time between MemQueue checks for being full
#elasticsearch.bulk.queue_refresh_interval: 1
#
#
## Maximal interval of time during which MemQueue does not dequeue a single document
## For example, if no documents were sent to Elasticsearch within 60 seconds because of
## Elasticsearch being overloaded, then an error will be raised.
## This mechanism exists to be a circuit-breaker for stuck jobs and stuck Elasticsearch.
#elasticsearch.bulk.queue_refresh_timeout: 60
#
#
## The max size in MB of a bulk request.
## When the next request being prepared reaches that size, the query is
## emitted even if `chunk_size` is not yet reached.
#elasticsearch.bulk.chunk_max_mem_size: 5
#
#
## The max size of the bulk operation to Elasticsearch.
#elasticsearch.bulk.chunk_size: 500
#
#
## Maximum number of concurrent bulk requests.
#elasticsearch.bulk.max_concurrency: 5
#
#
## Maximum number of concurrent downloads in the backend.
#elasticsearch.bulk.concurrent_downloads: 10
#
#
## Maximum number of times failed bulk requests are retried
#elasticsearch.bulk.max_retries: 5
#
#
## Retry interval between failed bulk attempts
#elasticsearch.bulk.retry_interval: 10
#
#
## Enable to log ids of created/indexed/deleted/updated documents during a sync.
## This will be logged on 'DEBUG' log level. Note: this depends on the service.log_level, not elasticsearch.log_level
#elasticsearch.bulk.enable_operations_logging: false
#
## ------------------------------- Elasticsearch: Experimental ------------------------
#
## Experimental configuration options for Elasticsearch interactions.
#
#
## Enable usage of Connectors API instead of calling connectors indices directly
## Using direct index access is deprecated, and will be disallowed entirely in a future version
#elasticsearch.feature_use_connectors_api: true
## ------------------------------- Service ----------------------------------
#
## Connector service/framework related configurations
#
#
## The interval (in seconds) to poll connectors from Elasticsearch.
#service.idling: 30
#
#
## The interval (in seconds) to send a new heartbeat for a connector.
#service.heartbeat: 300
#
#
## The maximum number of retries for pre-flight check.
#service.preflight_max_attempts: 10
#
#
## The number of seconds to wait between each pre-flight check.
#service.preflight_idle: 30
#
#
## The maximum number of errors allowed in one event loop.
#service.max_errors: 20
#
#
## The number of seconds to reset `max_errors` count.
#service.max_errors_span: 600
#
#
## The maximum number of concurrent content syncs.
#service.max_concurrent_content_syncs: 1
#
#
## The maximum number of concurrent access control syncs.
#service.max_concurrent_access_control_syncs: 1
#
#
## The maximum size (in bytes) of files that the framework should be willing
## to download and/or process.
#service.max_file_download_size: 10485760
#
## The interval (in seconds) to run job cleanup task.
#service.job_cleanup_interval: 300
#
#
## Connector service log level.
#service.log_level: INFO
#
#
## ------------------------------- Extraction Service ----------------------------------
#
## Local extraction service-related configurations.
## These configurations are optional and are not included by default.
## The presence of these configurations enables local content extraction.
## By default, this whole object is `null`.
## See: https://www.elastic.co/guide/en/elasticsearch/reference/current/es-connectors-content-extraction.html#connectors-content-extraction-local
#
#
## The host of the local extraction service.
#extraction_service.host: null
#
#
## Request timeout for local extraction service requests, in seconds.
#extraction_service.timeout: 30
#
#
## Whether or not to use file pointers for local extraction.
## This can have very positive impacts on performance -
## both speed and memory consumption.
## However, it also requires that the Connectors deployment and the
## local extraction service deployment must share a filesystem.
#extraction_service.use_file_pointers: False
#
#
## The size (in bytes) that files are chunked to for streaming when sending
## a file to the local extraction service.
## Only applicable if `extraction_service.use_file_pointers` is `false`.
#extraction_service.stream_chunk_size: 65536
#
#
## The location for files to be extracted from.
## Only applicable if `extraction_service.use_file_pointers` is `true`.
#extraction_service.shared_volume_dir: /app/files
#
#
## ------------------------------- Sources ----------------------------------
#
## An object mapping service type names to class Fully Qualified Names
## E.g. `connectors.sources.mongo:MongoDataSource`.
## If adding a net-new connector, it must be added here for the framework to detect it.
## Default includes all tech preview, beta, and GA connectors in this repository.
## An example is:
## sources:
## mongodb: connectors.sources.mongo:MongoDataSource