forked from tomasnorre/crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathext_conf_template.txt
77 lines (54 loc) · 3.21 KB
/
ext_conf_template.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#########
## Settings
#########
# cat=Settings; type=string; label=Frontend website base path: Base path of the website frontend (e.g. if you call http://mydomain.com/cms/index.php in the browser the base path is "/cms/"). Leave empty to use the value of config.absRefPrefix instead.
frontendBasePath=/
# cat=Settings; type=boolean; label= Crawl hidden pages: Crawl hidden pages (By default they won't be crawled)
crawlHiddenPages=0
# cat=Settings; type=boolean; label= Make direct requests: If checked the crawler will make direct requests by including the index.php file instead of getting the page content via http(s)
makeDirectRequests=0
#########
## Queue
#########
# cat=Queue; type=int [1- 86400]; label= Maximal number of URLs, which can be added to the queue at one time
maxCompileUrls=10000
# cat=Queue; type=boolean; label= Enabled timeslot for duplication check: When this option is active, items will not be queued twice for the past if their scheduled time is the current time +-100 seconds.
enableTimeslot=1
#########
## Processing
#########
# cat=Processing; type=int [0-10000]; label= Sleep time between requests: Time in microseconds the crawler should sleep between requesting urls: low = faster / high = less stress for the server
sleepTime = 1000
# cat=Processing; type=int [0-100]; label= Sleep time after finishing: Time in seconds the crawler should sleep before finishing
sleepAfterFinish=10
# cat=Processing; type=int [1-10000]; label= Entries per run: How many queue entries should be processed in a run
countInARun=100
# cat=Processing; type=int [1-99]; label= Maximum processes
processLimit=1
# cat=Processing; type=int [1- 86400]; label= Maximal process runtime: in seconds - only necessary if processLimit > 1
processMaxRunTime=300
#########
## Cleanup
#########
# cat=Cleanup; type=boolean; label=Clean up old queue entries: If checked the older queue entries will be deleted when adding new crawler configurations from CLI.
cleanUpOldQueueEntries=1
# cat=Cleanup; type=int [1- 99]; label=Processed Age: If Clean up old queue entries is checked, then processed entries older than X days are deleted.
cleanUpProcessedAge=2
# cat=Cleanup; type=int [1- 99]; label=Scheduled Age: If Clean up old queue entries is checked, then scheduled entries older than X days are deleted.
cleanUpScheduledAge=7
# cat=Cleanup; type=int [1-365]; label= Delete processed items: Delete processed items from the queue after n days (0 will keep the entries forever - the database may grow very large over time!)
purgeQueueDays=14
#########
## System
#########
# cat=System; type=string; label= Name of the php binary (e.g. PHP72-LATEST-CLI ), default is php
phpBinary=php
# cat=System; type=string; label= PHP Path: Local path to php binary file (e.g. "/usr/bin/php"), you should ONLY use this when the resolved php-binary isn't the correct one. You can check that in the Info -> Site Crawling -> Crawling Process -> CLI-Path
phpPath=
#########
## Debug
#########
# cat=Debug; type=boolean; label= Debug: Print Multiprocess- processing informations - prints some information whether a process was really executed and which status it has
processDebug=0
# cat=Debug; type=boolean; label= Make Multiprocess- processing be verbose while running
processVerbose=0