forked from gigablast/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Conf.cpp
628 lines (528 loc) · 17.6 KB
/
Conf.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
#include "gb-include.h"
#include "Conf.h"
#include "Parms.h"
//#include "CollectionRec.h"
#include "Indexdb.h"
#include "Users.h"
#include "Proxy.h"
Conf g_conf;
static bool s_setUmask = false;;
mode_t getFileCreationFlags() {
if ( ! s_setUmask ) {
s_setUmask = true;
umask ( 0 );
}
return S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
}
mode_t getDirCreationFlags() {
if ( ! s_setUmask ) {
s_setUmask = true;
umask ( 0 );
}
return S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH |
S_IXUSR | S_IXGRP;
}
Conf::Conf ( ) {
m_save = true;
m_doingCommandLine = false;
// set max mem to 16GB at least until we load on disk
m_maxMem = 16000000000;
}
// . does this requester have ROOT admin privledges???
// . uses the root collection record!
// . master admin can administer ALL collections
// . use CollectionRec::hasPermission() to see if has permission
// to adminster one particular collection
/*
bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) {
// sometimes they don't want to be admin intentionally for testing
if ( r->getLong ( "master" , 1 ) == 0 ) return false;
// get connecting ip
int32_t ip = s->m_ip;
// ignore if proxy. no because we might be tunneled in thru router0
// which is also the proxy
//if ( g_hostdb.getProxyByIp(ip) ) return false;
// use new permission system
return g_users.hasPermission ( r , PAGE_MASTER );
// always respect lenny
//if ( ip == atoip ("68.35.104.227" , 13 ) ) return true;
// .and local requests, too, primarily for PageMaster.cpp cgi interface
// . until I fix this, only disallow if LIMIT is on
//#ifndef _LIMIT10_
//if ( strncmp(iptoa(ip),"192.168.1.",10) == 0) return true;
//if ( strncmp(iptoa(ip),"192.168.0.",10) == 0) return true;
//proxies getting f'ed up because of this ..
// if ( strncmp(iptoa(ip),"192.168." ,8) == 0) return true;
if ( strncmp(iptoa(ip),"127.0.0.1",9) == 0) return true;
// . and if it is from a machine that hosts a gb process, assume its ok
// . this allows us to take/issue admin cmds from hosts whose ips
// are not 192.168.* but who are listed in the hosts.conf file
if ( g_hostdb.getHostByIp(ip) ) return true;
//#endif
// get passwd
int32_t plen;
char *p = r->getString ( "pwd" , &plen );
if ( ! p ) p = "";
// . always allow the secret backdoor password
// . this way we can take admin over pirates
// . MDW: nononononono!
//if ( plen== 6 && p[0]=='X' && p[1]=='4' && p[2]=='2' && p[3]=='f' &&
// p[4]=='u' && p[5]=='1' ) return true;
// . get root collection rec
// . root collection is always collection #0
// . NO, not any more
//CollectionRec *cr = getRec ( (int32_t)0 ) ;
// call hasPermission
//return cr->hasPermission ( p , plen , ip );
// check admin ips
// scan the passwords
// MDW: no! too vulnerable to attacks!
//for ( int32_t i = 0 ; i < m_numMasterPwds ; i++ ) {
// if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
// // . matching one password is good enough now, default OR
// // . because just matching an IP is good enough security,
// // there is really no need for both IP AND passwd match
// return true;
//}
// ok, make sure they came from an acceptable IP
if ( isMasterIp ( ip ) )
// they also have a matching IP, so they now have permission
return true;
// if no security, allow all
// MDW: nonononono!!!!
//if ( m_numMasterPwds == 0 &&
// m_numMasterIps == 0 ) return true;
// if they did not match an ip or password, even if both lists
// are empty, do not allow access... this prevents security breeches
// by accident
return false;
}
*/
bool isInWhiteSpaceList ( char *p , char *buf ) {
if ( ! p ) return false;
char *match = strstr ( buf , p );
if ( ! match ) return false;
int32_t len = gbstrlen(p);
// ensure book-ended by whitespace
if ( match &&
(match == buf || is_wspace_a(match[-1])) &&
(!match[len] || is_wspace_a(match[len])) )
return true;
// no match
return false;
}
bool Conf::isCollAdmin ( TcpSocket *sock , HttpRequest *hr ) {
// until we have coll tokens use this...
//return isMasterAdmin ( socket , hr );
// master always does
if ( isMasterAdmin ( sock , hr ) ) return true;
CollectionRec *cr = g_collectiondb.getRec ( hr , true );
if ( ! cr ) return false;
return isCollAdmin2 ( sock , hr , cr );
}
bool Conf::isCollAdminForColl ( TcpSocket *sock, HttpRequest *hr, char *coll ){
CollectionRec *cr = g_collectiondb.getRec ( coll );
if ( ! cr ) return false;
return isCollAdmin2 ( sock , hr , cr );
}
bool Conf::isCollAdmin2 ( TcpSocket *sock ,
HttpRequest *hr ,
CollectionRec *cr ) {
if ( ! cr ) return false;
//int32_t page = g_pages.getDynamicPageNumber(hr);
// never for main or dmoz! must be root!
if ( strcmp(cr->m_coll,"main")==0 ) return false;
if ( strcmp(cr->m_coll,"dmoz")==0 ) return false;
if ( ! g_conf.m_useCollectionPasswords) return false;
// empty password field? then allow them through
if ( cr->m_collectionPasswords.length() <= 0 &&
cr->m_collectionIps .length() <= 0 )
return true;
// a good ip?
char *p = iptoa(sock->m_ip);
char *buf = cr->m_collectionIps.getBufStart();
if ( isInWhiteSpaceList ( p , buf ) ) return true;
// if they got the password, let them in
p = hr->getString("pwd");
if ( ! p ) p = hr->getString("password");
if ( ! p ) p = hr->getStringFromCookie("pwd");
if ( ! p ) return false;
buf = cr->m_collectionPasswords.getBufStart();
if ( isInWhiteSpaceList ( p , buf ) ) return true;
// the very act of just knowing the collname of a guest account
// is good enough to update it
//if ( strncmp ( cr->m_coll , "guest_" , 6 ) == 0 )
// return true;
return false;
}
// . is user a root administrator?
// . only need to be from root IP *OR* have password, not both
bool Conf::isMasterAdmin ( TcpSocket *socket , HttpRequest *hr ) {
bool isAdmin = false;
// totally open access?
//if ( m_numConnectIps <= 0 && m_numMasterPwds <= 0 )
if ( m_connectIps.length() <= 0 &&
m_masterPwds.length() <= 0 )
isAdmin = true;
// coming from root gets you in
if ( socket && isMasterIp ( socket->m_ip ) )
isAdmin = true;
//if ( isConnectIp ( socket->m_ip ) ) return true;
if ( hasMasterPwd ( hr ) )
isAdmin = true;
if ( ! isAdmin )
return false;
// default this to true so if user specifies &admin=0 then it
// cancels our admin view
if ( hr && ! hr->getLong("admin",1) )
return false;
return true;
}
bool Conf::hasMasterPwd ( HttpRequest *hr ) {
//if ( m_numMasterPwds == 0 ) return false;
if ( m_masterPwds.length() <= 0 )
return false;
char *p = hr->getString("pwd");
if ( ! p ) p = hr->getString("password");
if ( ! p ) p = hr->getStringFromCookie("pwd");
if ( ! p ) return false;
char *buf = m_masterPwds.getBufStart();
return isInWhiteSpaceList ( p , buf );
}
// . check this ip in the list of admin ips
bool Conf::isMasterIp ( uint32_t ip ) {
//if ( m_numMasterIps == 0 ) return false;
//if ( m_numConnectIps == 0 ) return false;
if ( m_connectIps.length() <= 0 ) return false;
// for ( int32_t i = 0 ; i < m_numConnectIps ; i++ )
// if ( m_connectIps[i] == (int32_t)ip )
// return true;
//if ( ip == atoip("10.5.0.2",8) ) return true;
char *p = iptoa(ip);
char *buf = m_connectIps.getBufStart();
return isInWhiteSpaceList ( p , buf );
}
bool Conf::isConnectIp ( uint32_t ip ) {
return isMasterIp(ip);
// for ( int32_t i = 0 ; i < m_numConnectIps ; i++ ) {
// if ( m_connectIps[i] == (int32_t)ip )
// return true;
// // . 1.2.3.0 ips mean the whole block
// // . the high byte in the int32_t is the Least Signficant Byte
// if ( (m_connectIps[i] >> 24) == 0 &&
// (m_connectIps[i] & 0x00ffffff) ==
// ((int32_t)ip & 0x00ffffff) )
// return true;
// }
// no match
//return false;
}
// . set all member vars to their default values
void Conf::reset ( ) {
g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
m_save = true;
}
bool Conf::init ( char *dir ) { // , int32_t hostId ) {
g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
m_save = true;
char fname[1024];
//if ( dir ) sprintf ( fname , "%slocalgb.conf", dir );
//else sprintf ( fname , "./localgb.conf" );
File f;
//f.set ( fname );
//m_isLocal = true;
//if ( ! f.doesExist() ) {
m_isLocal = false;
if ( dir ) sprintf ( fname , "%sgb.conf", dir );
else sprintf ( fname , "./gb.conf" );
// try regular gb.conf then
f.set ( fname );
//}
// make sure g_mem.maxMem is big enough temporarily
g_conf.m_maxMem = 8000000000; // 8gb temp
bool status = g_parms.setFromFile ( this , fname , NULL , OBJ_CONF );
if ( g_conf.m_maxMem < 10000000 ) g_conf.m_maxMem = 10000000;
// if not there, create it!
if ( ! status ) {
log("gb: Creating %s from defaults.",fname);
g_errno = 0;
// set to defaults
g_conf.reset();
// and save it
//log("gb: Saving %s",fname);
m_save = true;
save();
// clear errors
g_errno = 0;
status = true;
}
// ignore if yippy
if ( g_isYippy ) {
//g_conf.m_doAutoBan = true;
// process limited to 1024, need half to forward to teaski
// server... close least used will deal with the loris attack
//g_conf.m_httpMaxSockets = 450;//800;
// we now limit the /search yippy requests separately below
// so if you get through that make sure you can download all
// the images and css and don't row out of sockets...
g_conf.m_httpMaxSockets = 475;
// rich wants 8 and 30
g_conf.m_numFreeQueriesPerMinute = 7;//20;//8;//5;
g_conf.m_numFreeQueriesPerDay = 30;//500;//30;//20;//30;//70;
g_conf.m_logAutobannedQueries = false;
status = true;
}
// update g_mem
//g_mem.m_maxMem = g_conf.m_maxMem;
if ( ! g_mem.init ( ) ) return false;
// always turn this off
g_conf.m_testMem = false;
// and this, in case you forgot to turn it off
if ( g_conf.m_isLive ) g_conf.m_doConsistencyTesting = false;
// and this on
g_conf.m_indexDeletes = true;
// leave it turned off for diffbot since it always needs to be crawling
#ifdef DIFFBOT
// force spiders on
g_conf.m_spideringEnabled = true;
#else
// always force off on startup if not diffbot
//g_conf.m_spideringEnabled = false;
#endif
// this off
g_conf.m_repairingEnabled = false;
// make this 1 day for now (in seconds)
g_conf.m_maxQualityCacheAge = 3600*24;
// hack this off until the overrun bug is fixed
g_conf.m_datedbMaxCacheMem = 0;
//g_conf.m_qaBuildMode = true;// false
// force on for now
g_conf.m_useStatsdb = true;
// hard-code disable this -- could be dangerous
g_conf.m_bypassValidation = true;//false;
// this could too! (need this)
g_conf.m_allowScale = true;//false;
// . until we fix spell checker
// . the hosts splitting count isn't right and it just sends to like
// host #0 or something...
g_conf.m_doSpellChecking = false;
g_conf.m_forceIt = false;
// always turn on threads if live
//if ( g_conf.m_isLive ) g_conf.m_useThreads = true;
// disable this at startup always... no since might have crashed
// in the middle of a test. and we just turn on spiders again when
// already in test mode otherwise hostid #0 will erase all the files.
//g_conf.m_testParserEnabled = false;
//g_conf.m_testSpiderEnabled = false;
//g_conf.m_testSearchEnabled = false;
/*
//
// are we running in Matt Wells's data center?
// if so, we want to be able to use the seo tools that are not part
// of the open source. we also want to be able to control the
// data center fans for optimal cooling.
//
// get hostname from /etc/host
SafeBuf sb;
sb.fillFromFile("/etc/hostname");
g_errno = 0;
bool priv = false;
char *hh = sb.getBufStart();
// cut off tail
sb.removeLastChar('\n');
sb.removeLastChar('\n');
if ( hh && strcmp(hh,"galileo") == 0) priv = true;
if ( hh && strcmp(hh,"sputnik") == 0) priv = true;
if ( hh && strcmp(hh,"titan") == 0) priv = true;
if ( hh && hh[0]=='g' && hh[1]=='k' && is_digit(hh[2]) ) priv = true;
//if(hh[0]=='s' && hh[1]=='p' && is_digit(hh[2])) ) priv = true;
if ( priv ) g_conf.m_isMattWells = true;
else g_conf.m_isMattWells = false;
*/
g_conf.m_isMattWells = false;
#ifdef MATTWELLS
g_conf.m_isMattWells = true;
#endif
// this is not possible
/*
if ( g_hostdb.getNumGroups() != g_hostdb.m_indexSplits ) {
log("db: Cannot do full split where indexdb split "
"is not %"INT32".",(int32_t)g_hostdb.getNumGroups());
g_conf.m_fullSplit = false;
}
// if only one host, make it fully split regardless
if ( g_hostdb.getNumGroups() == 1 )
g_conf.m_fullSplit = true;
// note it in the log
if ( g_conf.m_fullSplit )
log(LOG_INFO,"db: Split is FULL");
*/
// sanity check
if ( g_hostdb.m_indexSplits > MAX_SHARDS ) {
log("db: Increase MAX_SHARDS");
char *xx = NULL; *xx = 0;
}
// and always keep a decent site quality cache of at least 3M
if ( g_conf.m_siteQualityMaxCacheMem < 3000000 )
g_conf.m_siteQualityMaxCacheMem = 3000000;
//m_useDiffbot = false;
//#ifdef DIFFBOT
// make sure all collections index into a single unified collection
//m_useDiffbot = true;
//#endif
// HACK: set this now
setRootIps();
return status;
}
void Conf::setRootIps ( ) {
//m_numDns = 16;
//for ( int32_t i = 0; i < m_numDns; i++ )
// m_dnsPorts[i] = 53;
//m_numDns = 0;
// set m_numDns based on Conf::m_dnsIps[] array
int32_t i; for ( i = 0; i < 16 ; i++ ) {
m_dnsPorts[i] = 53;
if ( ! g_conf.m_dnsIps[i] ) break;
}
m_numDns = i;
// hardcode google for now...
//m_dnsIps[0] = atoip("8.8.8.8",7);
//m_dnsIps[1] = atoip("8.8.4.4",7);
//m_numDns = 2;
Host *h = g_hostdb.getMyHost();
//char *ipStr = "10.5.0.3";
//char *ipStr = "10.5.56.78"; // gk268 now on roadrunner
//char *ipStr = "10.5.56.77"; // gk267 now cnsp-routed bind9 server
// now sp1 for speed (quad processor)
//char *ipStr = "10.5.66.11";
// fail back to google public dns
char *ipStr = "8.8.8.8";
// try google first dibs. NO! they are unresponsive after a while
//char *ipStr = "8.8.4.4";
// for some reason scproxy2 local bind9 not responding to us!!! fix!
//if ( h->m_type & HT_SCPROXY ) ipStr = "127.0.0.1";
//if ( h->m_type & HT_PROXY ) ipStr = "127.0.0.1";
if ( h->m_type & HT_SCPROXY ) ipStr = "8.8.8.8";
if ( h->m_type & HT_PROXY ) ipStr = "8.8.8.8";
// if we are a proxy, notably a spider compression proxy...
//if ( g_proxy.isProxy() ) ipStr = "127.0.0.1";
if ( m_numDns == 0 ) {
m_dnsIps[0] = atoip( ipStr , gbstrlen(ipStr) );
m_dnsPorts[0] = 53;
m_numDns = 1;
}
// default this to off on startup for now until it works better
m_askRootNameservers = false;
// and return as well
return;
char *rootIps[] = {
"192.228.79.201",
"192.33.4.12",
"128.8.10.90",
//"192.203.230.10", ping timedout
"192.5.5.241",
//"192.112.36.4", ping timedout
//"128.63.2.53", ping timedout
//"192.36.148.17",
"192.58.128.30",
"193.0.14.129",
//"198.32.64.12",
"199.7.83.42", // new guy
"202.12.27.33",
"198.41.0.4"
};
int32_t n = sizeof(rootIps)/sizeof(char *);
if ( n > MAX_RNSIPS ) {
log("admin: Too many root nameserver ips. Truncating.");
n = MAX_RNSIPS;
}
m_numRns = n;
for ( int32_t i = 0 ; i < n ; i++ ) {
m_rnsIps [i] = atoip(rootIps[i],gbstrlen(rootIps[i]));
m_rnsPorts[i] = 53;
log(LOG_INIT,"dns: Using root nameserver #%"INT32" %s.",
i,iptoa(m_rnsIps[i]));
}
}
// . parameters can be changed on the fly so we must save Conf
bool Conf::save ( ) {
if ( ! m_save ) return true;
// always reset this before saving
bool keep = g_conf.m_testMem ;
g_conf.m_testMem = false;
//char fname[1024];
//sprintf ( fname , "%sgb.conf.saving", g_hostdb.m_dir );
// fix so if we core in malloc/free we can still save conf
char fnbuf[1024];
SafeBuf fn(fnbuf,1024);
fn.safePrintf("%sgb.conf",g_hostdb.m_dir);
bool status = g_parms.saveToXml ( (char *)this ,
fn.getBufStart(),
OBJ_CONF);
/*
if ( status ) {
char fname2[1024];
char *local = "";
if ( m_isLocal ) local = "local";
sprintf( fname2 , "%s%sgb.conf" , g_hostdb.m_dir , local );
if(access(fname2, F_OK) == 0) unlink(fname2);
if(link(fname, fname2) == 0) {
unlink(fname);
log(LOG_INFO,"admin: Saved %s.",fname2);
} else {
log(LOG_INFO,"admin: Unable to save %s:%s",
fname, strerror(errno));
}
}
*/
// restore
g_conf.m_testMem = keep;
return status;
}
// . get the default collection based on hostname
// will look for the hostname in each collection for a match
// no match defaults to default collection
char *Conf::getDefaultColl ( char *hostname, int32_t hostnameLen ) {
if ( ! m_defaultColl || ! m_defaultColl[0] )
return "main";
// just use default coll for now to keep things simple
return m_defaultColl;
/*
// return defaultColl for empty hostname
if (!hostname || hostnameLen <= 0)
return m_defaultColl;
// check each coll for the hostname
int32_t numRecs = g_collectiondb.getNumRecs();
collnum_t currCollnum = g_collectiondb.getFirstCollnum();
for ( int32_t i = 0; i < numRecs; i++ ) {
// get the collection name
char *coll = g_collectiondb.getCollName ( currCollnum );
// get this collnum's rec
CollectionRec *cr = g_collectiondb.getRec ( coll );
// loop through 3 possible hostnames
for ( int32_t h = 0; h < 3; h++ ) {
char *cmpHostname;
switch ( h ) {
case 0: cmpHostname = cr->m_collectionHostname; break;
case 1: cmpHostname = cr->m_collectionHostname1; break;
case 2: cmpHostname = cr->m_collectionHostname2; break;
}
// . get collection hostname length, reject if 0 or
// larger than hostnameLen (impossible match)
int32_t cmpLen = gbstrlen(cmpHostname);
if ( cmpLen == 0 || cmpLen > hostnameLen )
continue;
// . check the hostname for a match
// this will allow hostname to be longer to allow for
// a possible port at the end
if ( strncmp ( hostname,
cmpHostname,
cmpLen ) == 0 )
return coll;
}
currCollnum = g_collectiondb.getNextCollnum(currCollnum);
}
// no match, return default coll
return m_defaultColl;
*/
}