forked from privacore/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDns.cpp
2615 lines (2438 loc) · 86.8 KB
/
Dns.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include "Dns.h"
#include "HashTableT.h"
#include "Process.h"
#include "File.h"
#include "Conf.h"
#include "Hostdb.h"
#include "Dns_internals.h"
#include "ip.h"
#include "Mem.h"
#include "Errno.h"
#include "hash.h"
#include "utf8_fast.h"
#include "gbmemcpy.h"
#include <fcntl.h>
// comment out the following line to disable DNS TLD caching
// TLD caching seems to give about 15% performance increase over not caching.
// it has been pretty thoroughly tested, but if there is a problem,
// feel free to disable it.
#define DNS_TLD_CACHE
// See section 7. RESOLVER IMPLEMENTATION in the rfc 1035
// TODO: use the canonical name as a normalization!!
#define MAX_DEPTH 18 // we can have a lot of CNAME aliases w/ akamai
#define MAX_TRIED_IPS 32 // stop after asking 32 nameservers, return timed out
#define LOOP_BUF_SIZE 26100
#define MAX_DNS_HOSTNAME_LEN 127
// use a default of 1 day for both caches
#define DNS_CACHE_MAX_AGE (60*60*24)
struct DnsState {
key96_t m_hostnameKey;
// key for lookup into s_dnsTable hash table
int64_t m_tableKey;
Dns *m_this ;
void *m_state;
void (*m_callback)(void *state, int32_t ip);
bool m_freeit;
char m_hostname[MAX_DNS_HOSTNAME_LEN+1];
// . point to the replies received from dns servers
// . m_dnsNames[] should point into these reply buffers
//char *m_replyBufPtrs[6];
//int32_t m_numReplies;
// we can do a recursion up to 5 levels deep. sometimes the reply
// we get back is a list of ips of nameservers we need to ask.
// that can happen a few times in a row, and we have to keep track
// of the depth here. initially we set these ips to those of the
// root servers (or sometimes the local bind servers).
bool m_rootTLD [MAX_DEPTH];
int32_t m_fallbacks[MAX_DEPTH];
int32_t m_dnsIps [MAX_DEPTH][MAX_DNS_IPS];
int32_t m_numDnsIps[MAX_DEPTH];
int32_t m_depth; // current depth
// . use these nameservers to do the lookup
// . if not provided, they default to the root nameservers
// . the first one we ask is based on hash of hostname % m_numDns,
// if that times out, the 2nd is right after the first, etc. so we
// always stay in order.
// . m_dnsNames point into m_nameBuf, m_namePtr pts to the end of
// m_nameBuf so you can add new names to it.
// m_dnsNames are NULLified when getIPOfDNS() get their ip address
// which is then added to m_dnsIps[]
char *m_dnsNames [MAX_DEPTH][MAX_DNS_IPS];
int32_t m_numDnsNames [MAX_DEPTH];
char m_nameBuf [512];
char *m_nameBufPtr;
char *m_nameBufEnd;
// this holds the one and only dns request
char m_request[512];
int32_t m_requestSize;
// after we send to a nameserver add its ip to this list so we don't
// send to it again. or so we do not even add it to m_dnsIps again.
int32_t m_triedIps[MAX_TRIED_IPS];
int32_t m_numTried;
// if we have to get the ip of the dns, then we get back more dns
// that refer to that dns and we have to get the ip of those, ... etc.
// for getting the ip of a dns we cast m_buf as a DnsState and use
// that to avoid having to allocate more memory. however, we have to
// keep track of how many times we do that recursively until we run
// out of m_buf.
int32_t m_loopCount;
// set to EDNSDEAD (hostname does not exist) if we encounter that
// error, however, we continue to ask other dns servers about the
// hostname because we can often uncover the ip address that way.
// but if we never do, we want to return this error, not ETIMEDOUT.
int32_t m_errno;
// we have to turn it off in some requests for some reason
// like for www.fsis.usda.gov, othewise we get a refused to talk error
bool m_recursionDesired;
// have a total timeout function
int32_t m_startTime;
char m_buf[LOOP_BUF_SIZE];
};
// a global class extern'd in .h file
Dns g_dns;
static RdbCache g_timedoutCache;
static int64_t s_antiLockCount = 1LL;
#define TIMEOUT_SINGLE_HOST_MS 30000
#define TIMEOUT_TOTAL 90
// CallbackEntry now defined in HashTableT.cpp
static HashTableT<int64_t,CallbackEntry> s_dnstable;
static HashTableT<uint32_t,TLDIPEntry> s_TLDIPtable;
Dns::Dns() {
m_ips = NULL;
m_keys = NULL;
m_numSlots = 0;
m_dnsClientPort = 0;
}
// reset the udp server and rdb cache
void Dns::reset() {
log("db: resetting dns");
m_udpServer.reset();
m_rdbCache.reset();
g_timedoutCache.reset();
s_dnstable.reset();
s_TLDIPtable.reset();
m_rdbCacheLocal.reset();
// free hash table of /etc/hosts
if ( m_ips ) mfree ( m_ips , m_numSlots*4 , "Dns");
if ( m_keys ) mfree ( m_keys , m_numSlots*sizeof(key96_t), "Dns");
m_ips = NULL;
m_keys = NULL;
m_numSlots = 0;
}
// . port will be incremented if already in use
// . use 1 socket for recving and sending
// . we can use a shared rdb cache
// . we use the dbId to separate our cache entries from other db's entries
bool Dns::init ( uint16_t clientPort ) {
// get primary dns server info from the conf class
m_dnsClientPort = clientPort; // g_conf.m_dnsClientPort;
// set the name of the cache. it will save to WORKDIR/{name}.dat
int32_t maxMem = g_conf.m_dnsMaxCacheMem ;
// . how many nodes in cache tree can we fit?
// . each rec is key (12) and ip(4)
// . overhead in cache is 56
// . that makes 56 + 4 = 60
// . not correct? stats suggest it's less than 25 bytes each
int32_t maxCacheNodes = maxMem / 25;
// make a copy of our protocol to pass to udp server
// static DnsProtocol proto;
// set the cache
if ( ! m_rdbCache.init ( maxMem ,
4 , // fixed data size of rec
maxCacheNodes ,
"dns" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1 )) { // numPtrsMax
log( LOG_ERROR, "dns: Cache init failed." );
return false;
}
// make a copy of our protocol to pass to udp server
// static DnsProtocol proto;
// set the cache
int32_t maxMemLocal = 100000;
if ( ! m_rdbCacheLocal.init ( maxMemLocal ,
4 , // fixed data size of rec
maxMemLocal/25,
"dnsLocal" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1 )) { // numPtrsMax
log( LOG_ERROR, "dns: Cache local init failed." );
return false;
}
// . set the port, proto and hostmap in our udpServer
// . poll for timeouts every 11 seconds (11000 milliseconds)
if ( ! m_udpServer.init ( m_dnsClientPort,
&m_proto ,
64000 ,// sock read buf
32000 ,// sock write buf
500 ,//polltime(.5secs)
500 ,//maxudpslots
true )) { // is dns?
log( LOG_ERROR, "dns: Udp server init failed." );
return false;
}
// innocent log msg
log ( LOG_INIT,"dns: Sending requests on client port %" PRIu16, (uint16_t)m_dnsClientPort );
for ( int32_t i = 0 ; i < g_conf.m_numDns ; i++ ) {
if ( !g_conf.m_dnsIps[i] ) continue;
char ipbuf[16];
log ( LOG_INIT, "dns: Using nameserver %s:%i.",
iptoa(g_conf.m_dnsIps[i],ipbuf) , g_conf.m_dnsPorts[i] );
}
// . only init the timedout cache once
// . cache the dns servers' ips who timeout on us so we don't slow
// things down. later we can turn this into a "speed" cache, so
// we ask the fastest servers first.
static bool s_init = false;
if ( s_init ) return true;
// just 30k for this little guy
int32_t maxCacheMem = 30000;
maxCacheNodes = maxCacheMem / 25;
g_timedoutCache.init ( maxCacheMem ,
4 , // fixed data size of rec
maxCacheNodes ,
"dnstimedout" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1); // numPtrsMax
return true;
}
static bool isTimedOut(int32_t ip) {
// is this ip address in the "timed out" cache. if so,
// then do not try again for at least 1 hour
char *rec;
int32_t recSize;
int32_t maxAge = 3600; // 1 hour in seconds
key96_t k;
k.n0 = 0LL;
k.n1 = ip;
RdbCacheLock rcl(g_timedoutCache);
bool inCache = g_timedoutCache.getRecord ( (collnum_t)0 ,
k , // key
&rec ,
&recSize,
false ,//do copy?
maxAge ,
true );//inc cnt
return inCache;
}
inline bool parseTLD(DnsState* ds, char* buf, int32_t* len) {
// parse out the TLD
const char* hostname= ds->m_hostname;
const char* cbeg = hostname + strlen(hostname);
const char* cend = cbeg;
bool found = false;
char* curs;
while (cbeg > hostname) {
if (*cbeg == '.') {
cbeg++;
found = true;
if (cend - cbeg > *len - 1)
return false;
*len = cend - cbeg;
gbmemcpy(buf, cbeg, *len);
buf[*len] = '\0';
for (curs = buf; *curs; curs++)
*curs = to_lower_a(*curs);
//log(LOG_DEBUG,"dns: parseTLD found '%s'", buf);
return true;
}
cbeg--;
}
return found;
}
inline uint32_t TLDIPKey(char* buf, int32_t len) {
// build "key" for TLD hash table
uint32_t key;
key = 0;
if (len > 4)
len = 4;
gbmemcpy(&key, buf, len);
return key;
}
#ifdef DNS_TLD_CACHE
// returns NULL if we had a TLD cache miss
// returns ptr if we had a TLD cache hit
// adjusts ds->m_depth and ds->m_dnsIps on a hit
static const TLDIPEntry* getTLDIP(DnsState* ds) {
//log(LOG_DEBUG, "dns: getTLDIP entry");
char buf[64];
int32_t len = sizeof(buf);
if (!parseTLD(ds, buf, &len)) {
log(LOG_WARN, "dns: unable to determine TLD for %s",
ds->m_hostname);
return NULL;
}
uint32_t key = TLDIPKey(buf, len);
if (key == 0) {
log(LOG_WARN, "dns: getTLDIP invalid key");
return NULL;
}
TLDIPEntry* tldip = s_TLDIPtable.getValuePointer(key);
if (tldip == NULL) {
//log(LOG_DEBUG, "dns: getTLDIP not in cache");
return NULL;
}
// JAB: 2038
if (tldip->expiry <= time(NULL)) {
log(LOG_DEBUG, "dns: getTLDIP expired for %s", ds->m_hostname);
return NULL;
}
log(LOG_DEBUG,"dns: TLD cache hit .%s NS depth %" PRId32" for %s.",
buf, (int32_t) ds->m_depth, ds->m_hostname);
return tldip;
}
static void dumpTLDIP( const char* tld,
TLDIPEntry* tldip) {
for (int32_t i = 0; i < tldip->numTLDIPs; i++) {
char ipbuf[16];
log(LOG_DEBUG, "dns: .%s TLD IP %s",
tld, iptoa(tldip->TLDIP[i],ipbuf));
}
}
static void setTLDIP( DnsState* ds,
TLDIPEntry* tldip) {
// see if it's already in cache (i.e. poss. expired).
const TLDIPEntry* cached = getTLDIP(ds);
int32_t now = time(NULL);
// expire TLD NS in 24 hours
tldip->expiry = now + 3600 * 24;
char buf[64];
int32_t len = sizeof(buf);
if (!parseTLD(ds, buf, &len)) {
log(LOG_WARN, "dns: unable to determine TLD for %s",
ds->m_hostname);
return;
}
char tld[64];
gbmemcpy(tld, buf, len);
tld[len] = '\0';
if (cached == NULL) {
uint32_t key = TLDIPKey(buf, len);
if (!s_TLDIPtable.addKey(key, *tldip)) {
log(LOG_WARN, "dns: unable to add %s to TLD cache",
ds->m_hostname);
return;
}
log(LOG_DEBUG, "dns: TLD .%s NS cache add", buf);
dumpTLDIP(tld, tldip);
}
else if (cached->expiry <= now) {
// JAB: non-const cast...
gbmemcpy((TLDIPEntry*) cached, tldip, sizeof(TLDIPEntry));
log(LOG_DEBUG, "dns: TLD .%s NS cache update", buf);
dumpTLDIP(tld, tldip);
}
else {
//log(LOG_DEBUG, "dns: TLD cache up-to-date");
}
}
#else // DNS_TLD_CACHE
// code that is run when TLDIP cache is disabled...
static const TLDIPEntry* getTLDIP(DnsState* ds) {
return NULL;
}
#endif // DNS_TLD_CACHE
// . returns true and sets g_errno on error
// . returns false if transaction blocked, true if completed
// . returns false if you must wait
// . calls gotIp with ip when it gets it or timesOut or errors out
// . sets *ip to 0 if none (does not exist)
// . sets *ip to -1 and sets g_errno if there was an error
bool Dns::getIp ( const char *hostname,
int32_t hostnameLen ,
int32_t *ip ,
void *state ,
void (* callback ) ( void *state , int32_t ip ) ,
DnsState *ds ,
int32_t timeout ,
bool dnsLookup) {
// . don't accept large hostnames
// . technically the limit is 255 but i'm stricter
if ( hostnameLen > MAX_DNS_HOSTNAME_LEN ) {
g_errno = EHOSTNAMETOOBIG;
log("dns: Asked to get IP of hostname over %d characters long.", MAX_DNS_HOSTNAME_LEN);
*ip=0;
return true;
}
// debug msg
char tmp[MAX_DNS_HOSTNAME_LEN+1];
gbmemcpy ( tmp , hostname , hostnameLen );
tmp [ hostnameLen ] = '\0';
log(LOG_DEBUG, "dns: hostname '%s'", tmp);
// assume no error
g_errno = 0;
// only g_dnsDistributed should be calling this, not g_dnsLocal
if ( this != &g_dns ) { g_process.shutdownAbort(true); }
// not thread safe
//if ( g_threads.amThread() ) { g_process.shutdownAbort(true); }
if ( hostnameLen <= 0 ) {
log(LOG_LOGIC,"dns: Asked to get IP of zero length hostname.");
*ip = 0;
return true;
}
// if url is already in a.b.c.d format return that
*ip = atoip ( hostname , hostnameLen );
if ( *ip != 0 ) {
log(LOG_DEBUG, "dns: IP address passed into getIp '%s'", tmp);
return true;
}
// key is hash of the hostname
key96_t hostKey96 = hash96 ( hostname , hostnameLen );
// . is it in the /etc/hosts file?
// . BAD: could have a key collision!! TODO: fix..
if ( g_conf.m_useEtcHosts && isInFile ( hostKey96 , ip ) ) return true;
// . try getting from the cache first
// . this returns true if was in the cache and sets *ip to the ip
// . we now cached EDNSTIMEDOUT errors for a day, so *ip can be -1
// . TODO: watchout for key collision
if ( isInCache ( hostKey96 , ip ) ) {
// return 1 to indicate we got it right away in *ip
if ( ! g_conf.m_logDebugDns ) return true;
//char *dd = "distributed";
//if ( this == &g_dnsLocal ) dd = "local";
// debug msg
char ipbuf[16];
log(LOG_DEBUG,"dns: got ip of %s for %s in distributed cache.",
iptoa(*ip,ipbuf),tmp);
return true;
}
// . if this hostname request is already in progress, wait for that
// reply to to come back rather than launching a duplicate request.
// . each bucket in the s_dnstable hashtable is a possible head of a
// linked list of callback/state pairs which are waiting for that
// hostname's ip
// . is the ip for this hostname already being fetched?
// . if so, there will be a callback entry class that should match its
// DnsState::m_callback in there and have a key of key.n0 (see below)
// . TODO: we can have collisions and end up getting back the wrong ip
// how can we fix this? keep a ptr to ds->m_hostname? and if does
// not match then just error out?
int64_t hostKey64 = hostKey96.n0 & 0x7fffffffffffffffLL;
// never let this be zero
if ( hostKey64 == 0 ) {
hostKey64 = 1;
}
// see if we are already looking up this hostname
CallbackEntry *ptr = s_dnstable.getValuePointer ( hostKey64 );
// if he has our key see if his hostname matches ours, it should.
if ( ptr &&
// we do not store hostnameLen in ds, so make sure this is 0
! ptr->m_ds->m_hostname[hostnameLen] &&
(int32_t)strlen(ptr->m_ds->m_hostname) == hostnameLen &&
strncmp ( ptr->m_ds->m_hostname, hostname, hostnameLen ) != 0 ) {
g_errno = EBADENGINEER;
log(LOG_WARN, "dns: Found key collision in wait queue. host %s has "
"same key as %s. key=%" PRIu64".",
ptr->m_ds->m_hostname, tmp, hostKey64);
//g_process.shutdownAbort(true);
// we should just error out if this happens, it is better
// than giving him the wrong ip, he will be retried later
// by the spider.
return true;
}
// regardless, add our "ce" to the table, but assume we are NOT first
// in line for a hostname and use a bogus key. it doesn't matter,
// we just need some memory to store our CallbackEntry class.
static int64_t s_bogus = 0;
// make a CallbackEntry class to add to a slot in the table
CallbackEntry ce;
ce.m_callback = callback;
ce.m_state = state;
ce.m_nextKey = 0LL; // assume we are the first for this hostname
ce.m_ds = NULL;
ce.m_listSize = 0;
ce.m_listId = 0;
// always inc now no matter what now so no danger of re-use
s_bogus++;
// if we are the first guy requesting the ip for this hostname
// then use "hostKey" to get the slot to store "ce",
int64_t finalKey = hostKey64 ;
// otherwise use "s_bogus" as the key. the bogus key is just for
// getting a slot to use to store "ce".
if ( ptr ) {
// let's hash it up for efficiency
finalKey = hash64 ( (char *)&s_bogus,8);
// never let this be 0
if ( finalKey == 0 ) finalKey = 1LL;
// bogus should never equal a key.n0 for any request, otherwise
// that is a collision. to avoid this possibility keep its hi
// bit set, and hi bit clear on the key.n0 key (hostKey). this
// way, a waiting slot can never collide with any other slot.
finalKey |= 0x8000000000000000LL;
}
// BUT if we are looking up a dns server's ip, then NEVER wait in
// line because we could deadlock!
if ( dnsLookup ) {
loop:
finalKey = hash64 ( hostKey64 , s_antiLockCount++ );
// it is not waiting in anyone's line, so turn this bit off
finalKey &= 0x7fffffffffffffffLL;
// ensure not 0
if ( finalKey == 0 ) finalKey = 1;
// assume hostKey is not in the table, even though it
// may be, we cannot wait in line behind it
ptr = NULL;
// ensure no collision, if so, s_antiLockCount will be
// different now so hash again until we do not collide
if ( s_dnstable.getValuePointer ( finalKey ) )
goto loop;
}
// assume we have no parent
int64_t parentKey = 0;
// if parent, set parentKey to "hostKey", the hash of the hostname
if ( ptr ) parentKey = hostKey64;
// make sure we do not have a circular dependency if we are looking
// up the ip of a dns in order to ask him the ip of what we are
// looking up.
// EXAMPLE:
// 1. get ip of xyz.com
// 2. have to ask dns1.xyz.com
// 3. to get his ip we have to ask dns2.xyz.com
// 4. and to get his ip we have to as dns1.xyz.com
// 5. which we'll see that it is already outstanding in the hashtable,
// i.e., it has a parent in there, and it will just wait in line
// never to get out of it, if it were not for the following circular
// dependency check:
// example url: www.hagener-schulen.de
int32_t loopCount = 0;
// loopCount is how many times we've had to ask for the ip of a
// nameserver recursively.
if ( ds ) loopCount = ds->m_loopCount;
// point to the current DnsState
char *parent = (char *)ds;
// the DnsState was built to hold a few DnsStates in it for just
// this purpose, so we can "backup" to our "parents" and make sure
// they did not initiate this linked list. Search for "ds2" below
// to see where we initiate the recursion.
while ( ptr && loopCount-- > 0 ) {
// the recursive "ds"es occupy DnsState::m_buf of their
// containing DnsState. go back one.
parent -= ((char *)ds->m_buf - (char *)ds);
// sanity check
//if ( ((DnsState *)parent)->m_buf != (char *)ds ) {
// g_process.shutdownAbort(true); }
// do we have the circular dependency?
if ( parent == (char *)ptr->m_ds ) {
g_errno = EBADENGINEER;
log(LOG_DEBUG,"dns: Caught circular dependency.");
return true;
}
}
// debug msg
log(LOG_DEBUG,"dns: Adding key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
finalKey,parentKey,callback,state);
// ensure "bogus" key not already present in table, otherwise,
// addKey will just overwrite the value!!
while ( ptr && s_dnstable.getValuePointer ( finalKey ) ) {
log("dns: Got collision on incremental key.");
finalKey += 1LL;
finalKey |= 0x8000000000000000LL;
}
// we need to be able to add ourselves to the table so our callback
// can get called, otherwise it is pointless. this returns false
// and sets g_errno on error.
int32_t slotNum = -1;
if ( ! s_dnstable.addKey ( finalKey , ce , &slotNum ) ) {
log("dns: Failed to add key to table: %s.",mstrerror(g_errno));
return true;
}
// get the value from the slot so we can insert into linked list.
CallbackEntry *ppp = s_dnstable.getValuePointerFromSlot ( slotNum );
// sanity check
if ( ppp->m_callback != callback || ppp->m_state != state ) {
log("dns: Failed sanity check 3.");
g_process.shutdownAbort(true);
}
// adding a key may have changed the parent ptr... get again just
// in case
if ( ptr ) {
ptr = s_dnstable.getValuePointer ( hostKey64 );
// sanity check - it should still be there for sure
if ( ! ptr ) { g_process.shutdownAbort(true); }
}
// . insert into beginning of the linked list to avoid having to scan
// . "ptr" is a ptr to the parent CallbackEntry, head of linked list
if ( ptr ) {
int64_t oldNext = ptr->m_nextKey;
ptr->m_nextKey = finalKey;
ppp->m_nextKey = oldNext;
// let parent know how big its linked list is
ptr->m_listSize++;
// propagate the list id, it is stored in the parent node
// so put it into us, too
ppp->m_listId = ptr->m_listId;
if ( g_conf.m_logDebugDns )
log(LOG_DEBUG,"dns: Waiting in line for %s. key=%" PRIu64". "
"nextKey=%" PRIu64" listSize=%" PRId32" listId=%" PRId32" "
"numSlots=%" PRId32".",
tmp,finalKey,oldNext,
ptr->m_listSize,ptr->m_listId,
s_dnstable.getNumSlots());
// ok, we block now, waiting for the initial callback
return false;
}
// init our linked list size count
ppp->m_listSize = 1;
// it is the parent, use 0 to indicate none
static int32_t s_listId = 0;
ppp->m_listId = s_listId++;
// . make a DnsState
// . set g_errno and return true on malloc() error
if ( ds )
ds->m_freeit = false;
else {
ds = (DnsState *) mmalloc ( sizeof(DnsState ), "Dns" );
if ( ! ds ) {
log("dns: Failed to allocate mem for ip lookup.");
// debug msg
log(LOG_DEBUG,"dns: Removing2 key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
hostKey64,parentKey,
callback,state);
s_dnstable.removeKey ( finalKey );
return true;
}
ds->m_freeit = true;
// keep track of how many times we pluck out a DnsState
// from DnsState::m_buf.
ds->m_loopCount = 0;
ds->m_startTime = getTime();
}
// set the ce.m_ds to our dns state so if a key collides later
// we can check DnsState::m_hostname. actually i think this is only
// used for sanity checking now.
ppp->m_ds = ds;
// reset this stuff
ds->m_numDnsIps [0] = 0;
ds->m_numDnsNames[0] = 0;
ds->m_depth = 0;
ds->m_numTried = 0;
ds->m_nameBufPtr = ds->m_nameBuf ;
ds->m_nameBufEnd = ds->m_nameBuf + 512;
ds->m_errno = 0;
ds->m_recursionDesired = true;
// debug msg
//log("dns::getIp: %s (key=%" PRIu64") NOT in cache...",tmp,key.n0);
// reset m_loopCount and startTime if we are just starting
if ( callback != gotIpOfDNSWrapper ) {
ds->m_loopCount = 0;
ds->m_startTime = getTime();
}
// set caller callback info
// hostKey96 and finalKey are basically the same thing for hostnames
// that are NOT waiting in line. but finalKey is the lower 64 bits
// of hostKey96, but finalKey should have its hi bit cleared to
// indicate it is not waiting in line. Also, if looking up the IP
// of a dns, dnsLookup is true, and finalKey is hashed with a
// special count to give a unique hash because we can not have
// dnsLookups waiting in line because of deadlock issues.
// search for "dns A" below to see what i'm talking about.
ds->m_hostnameKey = hostKey96;
ds->m_tableKey = finalKey;
ds->m_this = this;
ds->m_state = state;
ds->m_callback = callback;
gbmemcpy ( ds->m_hostname , hostname , hostnameLen );
ds->m_hostname [ hostnameLen ] = '\0';
// copy the sendBuf cuz we need it in gotIp() to ensure hostnames match
//char *copy = (char *) mdup ( msg , msgSize , "Dns" );
//if ( ! copy ) {
// if ( ds->m_freeit ) mfree (ds,sizeof(DnsState),"Dns");
// return -1;
//}
// hack this for now
//int32_t numDns = 0;
//int32_t dnsIps[MAX_DNSIPS];
// copy the initial nameserver ips into ds->m_dnsIps[0] (depth 0)
if ( g_conf.m_askRootNameservers ) {
// ROOT TLD CACHE ATTEMPT GOES HERE...
// this will fill in depth 1 in the query,
// if we have the nameservers cached...
log(LOG_DEBUG,"dns: hostname %s", ds->m_hostname);
gbmemcpy(ds->m_dnsIps[0],g_conf.m_rnsIps, g_conf.m_numRns * 4);
ds->m_numDnsIps[0] = g_conf.m_numRns;
ds->m_numDnsNames[0] = 0;
ds->m_rootTLD[0] = true;
ds->m_fallbacks[0] = 0;
// if a TLD is cached, copy it to depth 1
const TLDIPEntry* tldip = getTLDIP(ds);
if (tldip) {
gbmemcpy( ds->m_dnsIps[1],
tldip->TLDIP,
tldip->numTLDIPs * sizeof(uint32_t));
ds->m_numDnsIps[1] = tldip->numTLDIPs;
ds->m_numDnsNames[1] = 0;
ds->m_rootTLD[1] = true;
ds->m_fallbacks[1] = 0;
ds->m_depth = 1;
}
}
// otherwise, use the local bind9 servers
else {
//gbmemcpy(ds->m_dnsIps[0],g_conf.m_dnsIps,g_conf.m_numDns * 4);
int32_t numDns = 0;
for ( int32_t i = 0; i < MAX_DNSIPS; i++ ) {
if ( g_conf.m_dnsIps[i] == 0 ) continue;
ds->m_dnsIps[0][numDns] = g_conf.m_dnsIps[i];
numDns++;
}
ds->m_numDnsIps[0] = numDns;
ds->m_numDnsNames[0] = 0;
ds->m_rootTLD[0] = false;
ds->m_fallbacks[0] = 0;
}
// return 0 if we block on the reply
//if ( ! sendToNextDNS ( ds , timeout ) ) return false;
// timeout from an individual dns is 20 seconds
if ( ! sendToNextDNS ( ds ) )
return false;
// debug msg
log(LOG_DEBUG,"dns: Removing3 key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
hostKey64,parentKey,
callback,state);
// if we made it here, remove from table
s_dnstable.removeKey ( finalKey ) ;
// should we free it
if ( ds->m_freeit ) mfree ( ds , sizeof(DnsState) ,"Dns" );
// ok, g_errno should still be set, return true as specified
return true;
}
// . returns false if blocked, sets g_errno and returns true otherwise
// . this is called by sendToNextDNS() when it has to get the ip of the DNS to
// send the request to.
bool Dns::getIpOfDNS ( DnsState *ds ) {
// bail if none
if ( ds->m_numDnsNames[ds->m_depth] <= 0 ) {
log(LOG_DEBUG, "dns: no dnsnames for '%s'",
ds->m_hostname);
return true;
}
// use the secondary ds for doing this
DnsState *ds2 = (DnsState *)ds->m_buf;
// do not keep getting the ip of the ns which may require us to get
// the ips of its ns, etc...
if ( ds->m_loopCount >= 3 ) {
addToCache ( ds->m_hostnameKey , -1 );
g_errno = EBADENGINEER;
log(LOG_INFO,"dns: Hit too many authority redirects for %s.",
ds->m_hostname);
return true;
}
// sanity check
if ( LOOP_BUF_SIZE / (sizeof(DnsState) - LOOP_BUF_SIZE) < 3 ) {
log("dns: Increase LOOP_BUF_SIZE, %" PRId32", in Dns.h.",
(int32_t)LOOP_BUF_SIZE);
g_process.shutdownAbort(true);
}
// increment the loop count, we can only use m_buf so many times
// before running out of room.
ds2->m_loopCount = ds->m_loopCount + 1;
// set start time for timing out
ds2->m_startTime = ds->m_startTime;
// or if we have too many ips already, do not bother adding more
if (ds->m_numDnsIps[ds->m_depth]>=MAX_DNS_IPS){
log(LOG_WARN, "dns: Already have %" PRId32" ips at depth %" PRId32".",
(int32_t)MAX_DNS_IPS,(int32_t)ds->m_depth);
g_errno=EBUFTOOSMALL;
return true;
}
// do not do this! this will break!
// int32_t n = ds->m_hostnameKey.n0 % numNames;
int32_t n = 0; // first is usually ns1, usually better
loop:
// get the name to get the ip for
int32_t depth = ds->m_depth;
int32_t numNames = ds->m_numDnsNames[depth];
char *hostname = ds->m_dnsNames[depth][n];
int32_t hostnameLen = strlen(hostname);
int32_t ip = 0;
// loop over all dnsnames in case one causes a circular dependency
// . remove him from the array so we do not do him again
// . actually, this is not a guarantee, so we put a circular
// dependency check in getIP() above
ds->m_dnsNames [depth][n] = ds->m_dnsNames[depth][numNames-1];
ds->m_numDnsNames[depth]--;
//ds->m_numTried++;
//if (ds->m_numTried > .....
// debug note
log(LOG_DEBUG,"dns: Getting ip address of dns, %s for %s.",
hostname,ds->m_hostname);
// . returns -1 and sets g_errno on error
// . returns 0 if transaction blocked, 1 if completed
// . returns 0 if you must wait
// . calls gotIp with ip when it gets it or timesOut or errors out
// . set *ip to 0 if none (does not exist)
// . keep the timeout down to only 5 secs
// . do not set a mutual exclusion lock on ip lookups of dns servers
// in order to avoid having to lookups locking each other up. like
// . 1. we are getting the ip of dns A for resolve of hostname #1
// . 2. we are getting the ip of dns B for resolve of hostname #2
// . 3. dns A says to ask B and B says to ask A, we end up in
// a deadlock
if ( !g_dns.getIp ( hostname ,
hostnameLen ,
&ip ,
ds ,//state
gotIpOfDNSWrapper , //state,ip
ds2 ,
5 , // timeout
true )) { // dns lookup?
log(LOG_DEBUG, "dns: no block for getIp for '%s'", hostname);
return false;
}
// if that would cause a circulare dependency, try the next one
if ( g_errno == EBADENGINEER ) {
if ( ds->m_numDnsNames[depth] ) {
log("dns: looping in getIpOfDns for '%s'",
hostname);
goto loop;
}
else
log("dns: No names left to try after %s",hostname);
}
// did it have an error? g_errno will be set
// . if ip is 0 it was a does not exist
// . add it to the array of ips
if ( ip != 0 && ds->m_numDnsIps[depth] + 1 < MAX_DNS_IPS) {
if (isTimedOut(ip)) {
char ipbuf[16];
log(LOG_DEBUG, "dns: Not adding [1] ip %s - timed out",
iptoa(ip,ipbuf));
}
else {
int32_t depth = ds->m_depth;
char ipbuf[16];
log(LOG_DEBUG,
"dns: Added ip [1-%" PRId32"] %s to depth %" PRId32" for %s.",
ds->m_numDnsIps[depth],
iptoa(ip,ipbuf),(int32_t)depth,ds->m_hostname);
ds->m_dnsIps[depth][ds->m_numDnsIps[depth]++] = ip ;
}
}
// we did not block
return true;
}
void Dns::gotIpOfDNSWrapper(void *state, int32_t ip) {
DnsState *ds = (DnsState *)state;
// log debug msg
//DnsState *ds2 = (DnsState *)ds->m_buf;
char ipbuf[16];
log(LOG_DEBUG,"dns: Got ip of dns %s for %s.",
iptoa(ip,ipbuf),ds->m_hostname);
// sanity check
if ( ds->m_numDnsIps[ds->m_depth] + 1 >= MAX_DNS_IPS ) {
log("dns: Wierd. Not enough buffer.");
g_process.shutdownAbort(true);
}
// . if ip is 0 it was a does not exist
// . add it to the array of ips
if ( ! g_errno && ip != 0 &&
ds->m_numDnsIps[ds->m_depth] + 1 < MAX_DNS_IPS) {
if (isTimedOut(ip)) {
char ipbuf[16];
log(LOG_DEBUG, "dns: Not adding [2] ip %s - timed out",
iptoa(ip,ipbuf));
}
else {
int32_t depth = ds->m_depth;
ds->m_dnsIps[depth][ds->m_numDnsIps[depth]++] = ip ;
char ipbuf[16];
log(LOG_DEBUG,
"dns: Added ip [2-%" PRId32"] %s to depth %" PRId32" for %s.",
ds->m_numDnsIps[depth],
iptoa(ip,ipbuf),(int32_t)depth,ds->m_hostname);
}
}
// disregard any g_errnos cuz we will try again
g_errno = 0;
// just return if this blocks
if ( ! g_dns.sendToNextDNS ( ds ) ) {
log(LOG_DEBUG, "dns: sendToNextDns blocking for '%s'",
ds->m_hostname);
return ;
}
// if that does not block, then we are done... we got the final ip
// or g_errno is set. so call the callbacks.
log(LOG_DEBUG, "dns: getIpOfDNSWrapper calling returnIp for '%s'",
ds->m_hostname);
returnIp ( ds , ip );
// . otherwise, we must call the callback
// . call the callback w/ state and ip if there is one
// . g_errno may be set
//if ( ds->m_callback ) ds->m_callback ( ds->m_state , ip );
// free our state holding structure
//if ( ds->m_freeit ) mfree ( ds , sizeof(DnsState) ,"Dns" );
}
// returns false if blocked, sets g_errno and returns true otherwise
bool Dns::sendToNextDNS ( DnsState *ds ) {
//log(LOG_DEBUG, "dns: sendToNextDNS depth %d", ds->m_depth);
// let's clear g_errno since caller may have set it in gotIp()
g_errno = 0;
// if we have been at this too long, give up
int32_t now = getTime();
int32_t delta = now - ds->m_startTime;
// quick fix if the system clock was changed on us
if ( delta < 0 ) ds->m_startTime = now;
//if ( delta > 100 ) ds->m_startTime = now;
if ( delta > TIMEOUT_TOTAL ) {
log(LOG_DEBUG,"dns: Timing out the request for %s. Took over "
"%" PRId32" seconds. delta=%" PRId32". now=%" PRId32".",
ds->m_hostname,(int32_t)TIMEOUT_TOTAL,delta,now);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// if we have no more room to add to tried array, we're done,
// we've tried to ask too many nameservers already
if ( ds->m_numTried >= MAX_TRIED_IPS ) {
log(LOG_INFO,"dns: Asked maximum number of name servers, "
"%" PRId32", for %s. Timing out.",(int32_t)MAX_TRIED_IPS,
ds->m_hostname);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// get the current depth. if we exhaust all nameserver ips at this
// depth we may have to decrease it until we find some nameservers
// we haven't yet asked.
int32_t depth = ds->m_depth;
top:
log(LOG_DEBUG, "dns: at 'top' for '%s'", ds->m_hostname);
int32_t n = -1;
// how many ip do we have at this depth level? save this for
// comparing down below.
int32_t numDnsIps = ds->m_numDnsIps[depth];
// each DnsState has a list of ips of the nameservers to ask
// but which one we ask first depends on this hash
if ( ds->m_numDnsIps[depth] > 0 ) {
// easy var
int32_t num = ds->m_numDnsIps[depth];
// . pick the first candidate to send to
// . this should not always be zero because picking the groupId
// and hostId to send to is now in Dns::getResponsibleHost()
// and uses key.n1 exclusively
n = ds->m_hostnameKey.n0 % num;
// conenvience ptr
int32_t *ips = ds->m_dnsIps[depth];
// save