1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "resolv_cache.h" 30 #include <stdlib.h> 31 #include <string.h> 32 #include <time.h> 33 #include "pthread.h" 34 35 /* This code implements a small and *simple* DNS resolver cache. 36 * 37 * It is only used to cache DNS answers for a maximum of CONFIG_SECONDS seconds 38 * in order to reduce DNS traffic. It is not supposed to be a full DNS cache, 39 * since we plan to implement that in the future in a dedicated process running 40 * on the system. 41 * 42 * Note that its design is kept simple very intentionally, i.e.: 43 * 44 * - it takes raw DNS query packet data as input, and returns raw DNS 45 * answer packet data as output 46 * 47 * (this means that two similar queries that encode the DNS name 48 * differently will be treated distinctly). 49 * 50 * - the TTLs of answer RRs are ignored. our DNS resolver library does not use 51 * them anyway, but it means that records with a TTL smaller than 52 * CONFIG_SECONDS will be kept in the cache anyway. 53 * 54 * this is bad, but we absolutely want to avoid parsing the answer packets 55 * (and should be solved by the later full DNS cache process). 56 * 57 * - the implementation is just a (query-data) => (answer-data) hash table 58 * with a trivial least-recently-used expiration policy. 59 * 60 * Doing this keeps the code simple and avoids to deal with a lot of things 61 * that a full DNS cache is expected to do. 62 * 63 * The API is also very simple: 64 * 65 * - the client calls _resolv_cache_get() to obtain a handle to the cache. 66 * this will initialize the cache on first usage. the result can be NULL 67 * if the cache is disabled. 68 * 69 * - the client calls _resolv_cache_lookup() before performing a query 70 * 71 * if the function returns RESOLV_CACHE_FOUND, a copy of the answer data 72 * has been copied into the client-provided answer buffer. 73 * 74 * if the function returns RESOLV_CACHE_NOTFOUND, the client should perform 75 * a request normally, *then* call _resolv_cache_add() to add the received 76 * answer to the cache. 77 * 78 * if the function returns RESOLV_CACHE_UNSUPPORTED, the client should 79 * perform a request normally, and *not* call _resolv_cache_add() 80 * 81 * note that RESOLV_CACHE_UNSUPPORTED is also returned if the answer buffer 82 * is too short to accomodate the cached result. 83 * 84 * - when network settings change, the cache must be flushed since the list 85 * of DNS servers probably changed. this is done by calling 86 * _resolv_cache_reset() 87 * 88 * the parameter to this function must be an ever-increasing generation 89 * number corresponding to the current network settings state. 90 * 91 * This is done because several threads could detect the same network 92 * settings change (but at different times) and will all end up calling the 93 * same function. Comparing with the last used generation number ensures 94 * that the cache is only flushed once per network change. 95 */ 96 97 /* the name of an environment variable that will be checked the first time 98 * this code is called if its value is "0", then the resolver cache is 99 * disabled. 100 */ 101 #define CONFIG_ENV "BIONIC_DNSCACHE" 102 103 /* entries older than CONFIG_SECONDS seconds are always discarded. 104 */ 105 #define CONFIG_SECONDS (60*10) /* 10 minutes */ 106 107 /* maximum number of entries kept in the cache. This value has been 108 * determined by browsing through various sites and counting the number 109 * of corresponding requests. Keep in mind that our framework is currently 110 * performing two requests per name lookup (one for IPv4, the other for IPv6) 111 * 112 * www.google.com 4 113 * www.ysearch.com 6 114 * www.amazon.com 8 115 * www.nytimes.com 22 116 * www.espn.com 28 117 * www.msn.com 28 118 * www.lemonde.fr 35 119 * 120 * (determined in 2009-2-17 from Paris, France, results may vary depending 121 * on location) 122 * 123 * most high-level websites use lots of media/ad servers with different names 124 * but these are generally reused when browsing through the site. 125 * 126 * As such, a valud of 64 should be relatively conformtable at the moment. 127 */ 128 #define CONFIG_MAX_ENTRIES 64 129 130 /****************************************************************************/ 131 /****************************************************************************/ 132 /***** *****/ 133 /***** *****/ 134 /***** *****/ 135 /****************************************************************************/ 136 /****************************************************************************/ 137 138 /* set to 1 to debug cache operations */ 139 #define DEBUG 0 140 141 /* set to 1 to debug query data */ 142 #define DEBUG_DATA 0 143 144 #if DEBUG 145 # include <logd.h> 146 # define XLOG(...) \ 147 __libc_android_log_print(ANDROID_LOG_DEBUG,"libc",__VA_ARGS__) 148 149 #include <stdio.h> 150 #include <stdarg.h> 151 152 /** BOUNDED BUFFER FORMATTING 153 **/ 154 155 /* technical note: 156 * 157 * the following debugging routines are used to append data to a bounded 158 * buffer they take two parameters that are: 159 * 160 * - p : a pointer to the current cursor position in the buffer 161 * this value is initially set to the buffer's address. 162 * 163 * - end : the address of the buffer's limit, i.e. of the first byte 164 * after the buffer. this address should never be touched. 165 * 166 * IMPORTANT: it is assumed that end > buffer_address, i.e. 167 * that the buffer is at least one byte. 168 * 169 * the _bprint_() functions return the new value of 'p' after the data 170 * has been appended, and also ensure the following: 171 * 172 * - the returned value will never be strictly greater than 'end' 173 * 174 * - a return value equal to 'end' means that truncation occured 175 * (in which case, end[-1] will be set to 0) 176 * 177 * - after returning from a _bprint_() function, the content of the buffer 178 * is always 0-terminated, even in the event of truncation. 179 * 180 * these conventions allow you to call _bprint_ functions multiple times and 181 * only check for truncation at the end of the sequence, as in: 182 * 183 * char buff[1000], *p = buff, *end = p + sizeof(buff); 184 * 185 * p = _bprint_c(p, end, '"'); 186 * p = _bprint_s(p, end, my_string); 187 * p = _bprint_c(p, end, '"'); 188 * 189 * if (p >= end) { 190 * // buffer was too small 191 * } 192 * 193 * printf( "%s", buff ); 194 */ 195 196 /* add a char to a bounded buffer */ 197 static char* 198 _bprint_c( char* p, char* end, int c ) 199 { 200 if (p < end) { 201 if (p+1 == end) 202 *p++ = 0; 203 else { 204 *p++ = (char) c; 205 *p = 0; 206 } 207 } 208 return p; 209 } 210 211 /* add a sequence of bytes to a bounded buffer */ 212 static char* 213 _bprint_b( char* p, char* end, const char* buf, int len ) 214 { 215 int avail = end - p; 216 217 if (avail <= 0 || len <= 0) 218 return p; 219 220 if (avail > len) 221 avail = len; 222 223 memcpy( p, buf, avail ); 224 p += avail; 225 226 if (p < end) 227 p[0] = 0; 228 else 229 end[-1] = 0; 230 231 return p; 232 } 233 234 /* add a string to a bounded buffer */ 235 static char* 236 _bprint_s( char* p, char* end, const char* str ) 237 { 238 return _bprint_b(p, end, str, strlen(str)); 239 } 240 241 /* add a formatted string to a bounded buffer */ 242 static char* 243 _bprint( char* p, char* end, const char* format, ... ) 244 { 245 int avail, n; 246 va_list args; 247 248 avail = end - p; 249 250 if (avail <= 0) 251 return p; 252 253 va_start(args, format); 254 n = vsnprintf( p, avail, format, args); 255 va_end(args); 256 257 /* certain C libraries return -1 in case of truncation */ 258 if (n < 0 || n > avail) 259 n = avail; 260 261 p += n; 262 /* certain C libraries do not zero-terminate in case of truncation */ 263 if (p == end) 264 p[-1] = 0; 265 266 return p; 267 } 268 269 /* add a hex value to a bounded buffer, up to 8 digits */ 270 static char* 271 _bprint_hex( char* p, char* end, unsigned value, int numDigits ) 272 { 273 char text[sizeof(unsigned)*2]; 274 int nn = 0; 275 276 while (numDigits-- > 0) { 277 text[nn++] = "0123456789abcdef"[(value >> (numDigits*4)) & 15]; 278 } 279 return _bprint_b(p, end, text, nn); 280 } 281 282 /* add the hexadecimal dump of some memory area to a bounded buffer */ 283 static char* 284 _bprint_hexdump( char* p, char* end, const uint8_t* data, int datalen ) 285 { 286 int lineSize = 16; 287 288 while (datalen > 0) { 289 int avail = datalen; 290 int nn; 291 292 if (avail > lineSize) 293 avail = lineSize; 294 295 for (nn = 0; nn < avail; nn++) { 296 if (nn > 0) 297 p = _bprint_c(p, end, ' '); 298 p = _bprint_hex(p, end, data[nn], 2); 299 } 300 for ( ; nn < lineSize; nn++ ) { 301 p = _bprint_s(p, end, " "); 302 } 303 p = _bprint_s(p, end, " "); 304 305 for (nn = 0; nn < avail; nn++) { 306 int c = data[nn]; 307 308 if (c < 32 || c > 127) 309 c = '.'; 310 311 p = _bprint_c(p, end, c); 312 } 313 p = _bprint_c(p, end, '\n'); 314 315 data += avail; 316 datalen -= avail; 317 } 318 return p; 319 } 320 321 /* dump the content of a query of packet to the log */ 322 static void 323 XLOG_BYTES( const void* base, int len ) 324 { 325 char buff[1024]; 326 char* p = buff, *end = p + sizeof(buff); 327 328 p = _bprint_hexdump(p, end, base, len); 329 XLOG("%s",buff); 330 } 331 332 #else /* !DEBUG */ 333 # define XLOG(...) ((void)0) 334 # define XLOG_BYTES(a,b) ((void)0) 335 #endif 336 337 static time_t 338 _time_now( void ) 339 { 340 struct timeval tv; 341 342 gettimeofday( &tv, NULL ); 343 return tv.tv_sec; 344 } 345 346 /* reminder: the general format of a DNS packet is the following: 347 * 348 * HEADER (12 bytes) 349 * QUESTION (variable) 350 * ANSWER (variable) 351 * AUTHORITY (variable) 352 * ADDITIONNAL (variable) 353 * 354 * the HEADER is made of: 355 * 356 * ID : 16 : 16-bit unique query identification field 357 * 358 * QR : 1 : set to 0 for queries, and 1 for responses 359 * Opcode : 4 : set to 0 for queries 360 * AA : 1 : set to 0 for queries 361 * TC : 1 : truncation flag, will be set to 0 in queries 362 * RD : 1 : recursion desired 363 * 364 * RA : 1 : recursion available (0 in queries) 365 * Z : 3 : three reserved zero bits 366 * RCODE : 4 : response code (always 0=NOERROR in queries) 367 * 368 * QDCount: 16 : question count 369 * ANCount: 16 : Answer count (0 in queries) 370 * NSCount: 16: Authority Record count (0 in queries) 371 * ARCount: 16: Additionnal Record count (0 in queries) 372 * 373 * the QUESTION is made of QDCount Question Record (QRs) 374 * the ANSWER is made of ANCount RRs 375 * the AUTHORITY is made of NSCount RRs 376 * the ADDITIONNAL is made of ARCount RRs 377 * 378 * Each Question Record (QR) is made of: 379 * 380 * QNAME : variable : Query DNS NAME 381 * TYPE : 16 : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255) 382 * CLASS : 16 : class of query (IN=1) 383 * 384 * Each Resource Record (RR) is made of: 385 * 386 * NAME : variable : DNS NAME 387 * TYPE : 16 : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255) 388 * CLASS : 16 : class of query (IN=1) 389 * TTL : 32 : seconds to cache this RR (0=none) 390 * RDLENGTH: 16 : size of RDDATA in bytes 391 * RDDATA : variable : RR data (depends on TYPE) 392 * 393 * Each QNAME contains a domain name encoded as a sequence of 'labels' 394 * terminated by a zero. Each label has the following format: 395 * 396 * LEN : 8 : lenght of label (MUST be < 64) 397 * NAME : 8*LEN : label length (must exclude dots) 398 * 399 * A value of 0 in the encoding is interpreted as the 'root' domain and 400 * terminates the encoding. So 'www.android.com' will be encoded as: 401 * 402 * <3>www<7>android<3>com<0> 403 * 404 * Where <n> represents the byte with value 'n' 405 * 406 * Each NAME reflects the QNAME of the question, but has a slightly more 407 * complex encoding in order to provide message compression. This is achieved 408 * by using a 2-byte pointer, with format: 409 * 410 * TYPE : 2 : 0b11 to indicate a pointer, 0b01 and 0b10 are reserved 411 * OFFSET : 14 : offset to another part of the DNS packet 412 * 413 * The offset is relative to the start of the DNS packet and must point 414 * A pointer terminates the encoding. 415 * 416 * The NAME can be encoded in one of the following formats: 417 * 418 * - a sequence of simple labels terminated by 0 (like QNAMEs) 419 * - a single pointer 420 * - a sequence of simple labels terminated by a pointer 421 * 422 * A pointer shall always point to either a pointer of a sequence of 423 * labels (which can themselves be terminated by either a 0 or a pointer) 424 * 425 * The expanded length of a given domain name should not exceed 255 bytes. 426 * 427 * NOTE: we don't parse the answer packets, so don't need to deal with NAME 428 * records, only QNAMEs. 429 */ 430 431 #define DNS_HEADER_SIZE 12 432 433 #define DNS_TYPE_A "\00\01" /* big-endian decimal 1 */ 434 #define DNS_TYPE_PTR "\00\014" /* big-endian decimal 12 */ 435 #define DNS_TYPE_MX "\00\017" /* big-endian decimal 15 */ 436 #define DNS_TYPE_AAAA "\00\034" /* big-endian decimal 28 */ 437 #define DNS_TYPE_ALL "\00\0377" /* big-endian decimal 255 */ 438 439 #define DNS_CLASS_IN "\00\01" /* big-endian decimal 1 */ 440 441 typedef struct { 442 const uint8_t* base; 443 const uint8_t* end; 444 const uint8_t* cursor; 445 } DnsPacket; 446 447 static void 448 _dnsPacket_init( DnsPacket* packet, const uint8_t* buff, int bufflen ) 449 { 450 packet->base = buff; 451 packet->end = buff + bufflen; 452 packet->cursor = buff; 453 } 454 455 static void 456 _dnsPacket_rewind( DnsPacket* packet ) 457 { 458 packet->cursor = packet->base; 459 } 460 461 static void 462 _dnsPacket_skip( DnsPacket* packet, int count ) 463 { 464 const uint8_t* p = packet->cursor + count; 465 466 if (p > packet->end) 467 p = packet->end; 468 469 packet->cursor = p; 470 } 471 472 static int 473 _dnsPacket_readInt16( DnsPacket* packet ) 474 { 475 const uint8_t* p = packet->cursor; 476 477 if (p+2 > packet->end) 478 return -1; 479 480 packet->cursor = p+2; 481 return (p[0]<< 8) | p[1]; 482 } 483 484 /** QUERY CHECKING 485 **/ 486 487 /* check bytes in a dns packet. returns 1 on success, 0 on failure. 488 * the cursor is only advanced in the case of success 489 */ 490 static int 491 _dnsPacket_checkBytes( DnsPacket* packet, int numBytes, const void* bytes ) 492 { 493 const uint8_t* p = packet->cursor; 494 495 if (p + numBytes > packet->end) 496 return 0; 497 498 if (memcmp(p, bytes, numBytes) != 0) 499 return 0; 500 501 packet->cursor = p + numBytes; 502 return 1; 503 } 504 505 /* parse and skip a given QNAME stored in a query packet, 506 * from the current cursor position. returns 1 on success, 507 * or 0 for malformed data. 508 */ 509 static int 510 _dnsPacket_checkQName( DnsPacket* packet ) 511 { 512 const uint8_t* p = packet->cursor; 513 const uint8_t* end = packet->end; 514 515 for (;;) { 516 int c; 517 518 if (p >= end) 519 break; 520 521 c = *p++; 522 523 if (c == 0) { 524 packet->cursor = p; 525 return 1; 526 } 527 528 /* we don't expect label compression in QNAMEs */ 529 if (c >= 64) 530 break; 531 532 p += c; 533 /* we rely on the bound check at the start 534 * of the loop here */ 535 } 536 /* malformed data */ 537 XLOG("malformed QNAME"); 538 return 0; 539 } 540 541 /* parse and skip a given QR stored in a packet. 542 * returns 1 on success, and 0 on failure 543 */ 544 static int 545 _dnsPacket_checkQR( DnsPacket* packet ) 546 { 547 int len; 548 549 if (!_dnsPacket_checkQName(packet)) 550 return 0; 551 552 /* TYPE must be one of the things we support */ 553 if (!_dnsPacket_checkBytes(packet, 2, DNS_TYPE_A) && 554 !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_PTR) && 555 !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_MX) && 556 !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_AAAA) && 557 !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_ALL)) 558 { 559 XLOG("unsupported TYPE"); 560 return 0; 561 } 562 /* CLASS must be IN */ 563 if (!_dnsPacket_checkBytes(packet, 2, DNS_CLASS_IN)) { 564 XLOG("unsupported CLASS"); 565 return 0; 566 } 567 568 return 1; 569 } 570 571 /* check the header of a DNS Query packet, return 1 if it is one 572 * type of query we can cache, or 0 otherwise 573 */ 574 static int 575 _dnsPacket_checkQuery( DnsPacket* packet ) 576 { 577 const uint8_t* p = packet->base; 578 int qdCount, anCount, dnCount, arCount; 579 580 if (p + DNS_HEADER_SIZE > packet->end) { 581 XLOG("query packet too small"); 582 return 0; 583 } 584 585 /* QR must be set to 0, opcode must be 0 and AA must be 0 */ 586 /* RA, Z, and RCODE must be 0 */ 587 if ((p[2] & 0xFC) != 0 || p[3] != 0) { 588 XLOG("query packet flags unsupported"); 589 return 0; 590 } 591 592 /* Note that we ignore the TC and RD bits here for the 593 * following reasons: 594 * 595 * - there is no point for a query packet sent to a server 596 * to have the TC bit set, but the implementation might 597 * set the bit in the query buffer for its own needs 598 * between a _resolv_cache_lookup and a 599 * _resolv_cache_add. We should not freak out if this 600 * is the case. 601 * 602 * - we consider that the result from a RD=0 or a RD=1 603 * query might be different, hence that the RD bit 604 * should be used to differentiate cached result. 605 * 606 * this implies that RD is checked when hashing or 607 * comparing query packets, but not TC 608 */ 609 610 /* ANCOUNT, DNCOUNT and ARCOUNT must be 0 */ 611 qdCount = (p[4] << 8) | p[5]; 612 anCount = (p[6] << 8) | p[7]; 613 dnCount = (p[8] << 8) | p[9]; 614 arCount = (p[10]<< 8) | p[11]; 615 616 if (anCount != 0 || dnCount != 0 || arCount != 0) { 617 XLOG("query packet contains non-query records"); 618 return 0; 619 } 620 621 if (qdCount == 0) { 622 XLOG("query packet doesn't contain query record"); 623 return 0; 624 } 625 626 /* Check QDCOUNT QRs */ 627 packet->cursor = p + DNS_HEADER_SIZE; 628 629 for (;qdCount > 0; qdCount--) 630 if (!_dnsPacket_checkQR(packet)) 631 return 0; 632 633 return 1; 634 } 635 636 /** QUERY DEBUGGING 637 **/ 638 #if DEBUG 639 static char* 640 _dnsPacket_bprintQName(DnsPacket* packet, char* bp, char* bend) 641 { 642 const uint8_t* p = packet->cursor; 643 const uint8_t* end = packet->end; 644 int first = 1; 645 646 for (;;) { 647 int c; 648 649 if (p >= end) 650 break; 651 652 c = *p++; 653 654 if (c == 0) { 655 packet->cursor = p; 656 return bp; 657 } 658 659 /* we don't expect label compression in QNAMEs */ 660 if (c >= 64) 661 break; 662 663 if (first) 664 first = 0; 665 else 666 bp = _bprint_c(bp, bend, '.'); 667 668 bp = _bprint_b(bp, bend, (const char*)p, c); 669 670 p += c; 671 /* we rely on the bound check at the start 672 * of the loop here */ 673 } 674 /* malformed data */ 675 bp = _bprint_s(bp, bend, "<MALFORMED>"); 676 return bp; 677 } 678 679 static char* 680 _dnsPacket_bprintQR(DnsPacket* packet, char* p, char* end) 681 { 682 #define QQ(x) { DNS_TYPE_##x, #x } 683 static const struct { 684 const char* typeBytes; 685 const char* typeString; 686 } qTypes[] = 687 { 688 QQ(A), QQ(PTR), QQ(MX), QQ(AAAA), QQ(ALL), 689 { NULL, NULL } 690 }; 691 int nn; 692 const char* typeString = NULL; 693 694 /* dump QNAME */ 695 p = _dnsPacket_bprintQName(packet, p, end); 696 697 /* dump TYPE */ 698 p = _bprint_s(p, end, " ("); 699 700 for (nn = 0; qTypes[nn].typeBytes != NULL; nn++) { 701 if (_dnsPacket_checkBytes(packet, 2, qTypes[nn].typeBytes)) { 702 typeString = qTypes[nn].typeString; 703 break; 704 } 705 } 706 707 if (typeString != NULL) 708 p = _bprint_s(p, end, typeString); 709 else { 710 int typeCode = _dnsPacket_readInt16(packet); 711 p = _bprint(p, end, "UNKNOWN-%d", typeCode); 712 } 713 714 p = _bprint_c(p, end, ')'); 715 716 /* skip CLASS */ 717 _dnsPacket_skip(packet, 2); 718 return p; 719 } 720 721 /* this function assumes the packet has already been checked */ 722 static char* 723 _dnsPacket_bprintQuery( DnsPacket* packet, char* p, char* end ) 724 { 725 int qdCount; 726 727 if (packet->base[2] & 0x1) { 728 p = _bprint_s(p, end, "RECURSIVE "); 729 } 730 731 _dnsPacket_skip(packet, 4); 732 qdCount = _dnsPacket_readInt16(packet); 733 _dnsPacket_skip(packet, 6); 734 735 for ( ; qdCount > 0; qdCount-- ) { 736 p = _dnsPacket_bprintQR(packet, p, end); 737 } 738 return p; 739 } 740 #endif 741 742 743 /** QUERY HASHING SUPPORT 744 ** 745 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKET HAS ALREADY 746 ** BEEN SUCCESFULLY CHECKED. 747 **/ 748 749 /* use 32-bit FNV hash function */ 750 #define FNV_MULT 16777619U 751 #define FNV_BASIS 2166136261U 752 753 static unsigned 754 _dnsPacket_hashBytes( DnsPacket* packet, int numBytes, unsigned hash ) 755 { 756 const uint8_t* p = packet->cursor; 757 const uint8_t* end = packet->end; 758 759 while (numBytes > 0 && p < end) { 760 hash = hash*FNV_MULT ^ *p++; 761 } 762 packet->cursor = p; 763 return hash; 764 } 765 766 767 static unsigned 768 _dnsPacket_hashQName( DnsPacket* packet, unsigned hash ) 769 { 770 const uint8_t* p = packet->cursor; 771 const uint8_t* end = packet->end; 772 773 for (;;) { 774 int c; 775 776 if (p >= end) { /* should not happen */ 777 XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__); 778 break; 779 } 780 781 c = *p++; 782 783 if (c == 0) 784 break; 785 786 if (c >= 64) { 787 XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__); 788 break; 789 } 790 if (p + c >= end) { 791 XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n", 792 __FUNCTION__); 793 break; 794 } 795 while (c > 0) { 796 hash = hash*FNV_MULT ^ *p++; 797 c -= 1; 798 } 799 } 800 packet->cursor = p; 801 return hash; 802 } 803 804 static unsigned 805 _dnsPacket_hashQR( DnsPacket* packet, unsigned hash ) 806 { 807 int len; 808 809 hash = _dnsPacket_hashQName(packet, hash); 810 hash = _dnsPacket_hashBytes(packet, 4, hash); /* TYPE and CLASS */ 811 return hash; 812 } 813 814 static unsigned 815 _dnsPacket_hashQuery( DnsPacket* packet ) 816 { 817 unsigned hash = FNV_BASIS; 818 int count; 819 _dnsPacket_rewind(packet); 820 821 /* we ignore the TC bit for reasons explained in 822 * _dnsPacket_checkQuery(). 823 * 824 * however we hash the RD bit to differentiate 825 * between answers for recursive and non-recursive 826 * queries. 827 */ 828 hash = hash*FNV_MULT ^ (packet->base[2] & 1); 829 830 /* assume: other flags are 0 */ 831 _dnsPacket_skip(packet, 4); 832 833 /* read QDCOUNT */ 834 count = _dnsPacket_readInt16(packet); 835 836 /* assume: ANcount, NScount, ARcount are 0 */ 837 _dnsPacket_skip(packet, 6); 838 839 /* hash QDCOUNT QRs */ 840 for ( ; count > 0; count-- ) 841 hash = _dnsPacket_hashQR(packet, hash); 842 843 return hash; 844 } 845 846 847 /** QUERY COMPARISON 848 ** 849 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKETS HAVE ALREADY 850 ** BEEN SUCCESFULLY CHECKED. 851 **/ 852 853 static int 854 _dnsPacket_isEqualDomainName( DnsPacket* pack1, DnsPacket* pack2 ) 855 { 856 const uint8_t* p1 = pack1->cursor; 857 const uint8_t* end1 = pack1->end; 858 const uint8_t* p2 = pack2->cursor; 859 const uint8_t* end2 = pack2->end; 860 861 for (;;) { 862 int c1, c2; 863 864 if (p1 >= end1 || p2 >= end2) { 865 XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__); 866 break; 867 } 868 c1 = *p1++; 869 c2 = *p2++; 870 if (c1 != c2) 871 break; 872 873 if (c1 == 0) { 874 pack1->cursor = p1; 875 pack2->cursor = p2; 876 return 1; 877 } 878 if (c1 >= 64) { 879 XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__); 880 break; 881 } 882 if ((p1+c1 > end1) || (p2+c1 > end2)) { 883 XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n", 884 __FUNCTION__); 885 break; 886 } 887 if (memcmp(p1, p2, c1) != 0) 888 break; 889 p1 += c1; 890 p2 += c1; 891 /* we rely on the bound checks at the start of the loop */ 892 } 893 /* not the same, or one is malformed */ 894 XLOG("different DN"); 895 return 0; 896 } 897 898 static int 899 _dnsPacket_isEqualBytes( DnsPacket* pack1, DnsPacket* pack2, int numBytes ) 900 { 901 const uint8_t* p1 = pack1->cursor; 902 const uint8_t* p2 = pack2->cursor; 903 904 if ( p1 + numBytes > pack1->end || p2 + numBytes > pack2->end ) 905 return 0; 906 907 if ( memcmp(p1, p2, numBytes) != 0 ) 908 return 0; 909 910 pack1->cursor += numBytes; 911 pack2->cursor += numBytes; 912 return 1; 913 } 914 915 static int 916 _dnsPacket_isEqualQR( DnsPacket* pack1, DnsPacket* pack2 ) 917 { 918 /* compare domain name encoding + TYPE + CLASS */ 919 if ( !_dnsPacket_isEqualDomainName(pack1, pack2) || 920 !_dnsPacket_isEqualBytes(pack1, pack2, 2+2) ) 921 return 0; 922 923 return 1; 924 } 925 926 static int 927 _dnsPacket_isEqualQuery( DnsPacket* pack1, DnsPacket* pack2 ) 928 { 929 int count1, count2; 930 931 /* compare the headers, ignore most fields */ 932 _dnsPacket_rewind(pack1); 933 _dnsPacket_rewind(pack2); 934 935 /* compare RD, ignore TC, see comment in _dnsPacket_checkQuery */ 936 if ((pack1->base[2] & 1) != (pack2->base[2] & 1)) { 937 XLOG("different RD"); 938 return 0; 939 } 940 941 /* assume: other flags are all 0 */ 942 _dnsPacket_skip(pack1, 4); 943 _dnsPacket_skip(pack2, 4); 944 945 /* compare QDCOUNT */ 946 count1 = _dnsPacket_readInt16(pack1); 947 count2 = _dnsPacket_readInt16(pack2); 948 if (count1 != count2 || count1 < 0) { 949 XLOG("different QDCOUNT"); 950 return 0; 951 } 952 953 /* assume: ANcount, NScount and ARcount are all 0 */ 954 _dnsPacket_skip(pack1, 6); 955 _dnsPacket_skip(pack2, 6); 956 957 /* compare the QDCOUNT QRs */ 958 for ( ; count1 > 0; count1-- ) { 959 if (!_dnsPacket_isEqualQR(pack1, pack2)) { 960 XLOG("different QR"); 961 return 0; 962 } 963 } 964 return 1; 965 } 966 967 /****************************************************************************/ 968 /****************************************************************************/ 969 /***** *****/ 970 /***** *****/ 971 /***** *****/ 972 /****************************************************************************/ 973 /****************************************************************************/ 974 975 /* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this 976 * structure though they are conceptually part of the hash table. 977 * 978 * similarly, mru_next and mru_prev are part of the global MRU list 979 */ 980 typedef struct Entry { 981 unsigned int hash; /* hash value */ 982 struct Entry* hlink; /* next in collision chain */ 983 struct Entry* mru_prev; 984 struct Entry* mru_next; 985 986 const uint8_t* query; 987 int querylen; 988 const uint8_t* answer; 989 int answerlen; 990 time_t when; /* time_t when entry was added to table */ 991 int id; /* for debugging purpose */ 992 } Entry; 993 994 995 static void 996 entry_free( Entry* e ) 997 { 998 /* everything is allocated in a single memory block */ 999 if (e) { 1000 free(e); 1001 } 1002 } 1003 1004 static __inline__ void 1005 entry_mru_remove( Entry* e ) 1006 { 1007 e->mru_prev->mru_next = e->mru_next; 1008 e->mru_next->mru_prev = e->mru_prev; 1009 } 1010 1011 static __inline__ void 1012 entry_mru_add( Entry* e, Entry* list ) 1013 { 1014 Entry* first = list->mru_next; 1015 1016 e->mru_next = first; 1017 e->mru_prev = list; 1018 1019 list->mru_next = e; 1020 first->mru_prev = e; 1021 } 1022 1023 /* compute the hash of a given entry, this is a hash of most 1024 * data in the query (key) */ 1025 static unsigned 1026 entry_hash( const Entry* e ) 1027 { 1028 DnsPacket pack[1]; 1029 1030 _dnsPacket_init(pack, e->query, e->querylen); 1031 return _dnsPacket_hashQuery(pack); 1032 } 1033 1034 /* initialize an Entry as a search key, this also checks the input query packet 1035 * returns 1 on success, or 0 in case of unsupported/malformed data */ 1036 static int 1037 entry_init_key( Entry* e, const void* query, int querylen ) 1038 { 1039 DnsPacket pack[1]; 1040 1041 memset(e, 0, sizeof(*e)); 1042 1043 e->query = query; 1044 e->querylen = querylen; 1045 e->hash = entry_hash(e); 1046 1047 _dnsPacket_init(pack, query, querylen); 1048 1049 return _dnsPacket_checkQuery(pack); 1050 } 1051 1052 /* allocate a new entry as a cache node */ 1053 static Entry* 1054 entry_alloc( const Entry* init, const void* answer, int answerlen ) 1055 { 1056 Entry* e; 1057 int size; 1058 1059 size = sizeof(*e) + init->querylen + answerlen; 1060 e = calloc(size, 1); 1061 if (e == NULL) 1062 return e; 1063 1064 e->hash = init->hash; 1065 e->query = (const uint8_t*)(e+1); 1066 e->querylen = init->querylen; 1067 1068 memcpy( (char*)e->query, init->query, e->querylen ); 1069 1070 e->answer = e->query + e->querylen; 1071 e->answerlen = answerlen; 1072 1073 memcpy( (char*)e->answer, answer, e->answerlen ); 1074 1075 e->when = _time_now(); 1076 1077 return e; 1078 } 1079 1080 static int 1081 entry_equals( const Entry* e1, const Entry* e2 ) 1082 { 1083 DnsPacket pack1[1], pack2[1]; 1084 1085 if (e1->querylen != e2->querylen) { 1086 return 0; 1087 } 1088 _dnsPacket_init(pack1, e1->query, e1->querylen); 1089 _dnsPacket_init(pack2, e2->query, e2->querylen); 1090 1091 return _dnsPacket_isEqualQuery(pack1, pack2); 1092 } 1093 1094 /****************************************************************************/ 1095 /****************************************************************************/ 1096 /***** *****/ 1097 /***** *****/ 1098 /***** *****/ 1099 /****************************************************************************/ 1100 /****************************************************************************/ 1101 1102 /* We use a simple hash table with external collision lists 1103 * for simplicity, the hash-table fields 'hash' and 'hlink' are 1104 * inlined in the Entry structure. 1105 */ 1106 #define MAX_HASH_ENTRIES (2*CONFIG_MAX_ENTRIES) 1107 1108 typedef struct resolv_cache { 1109 int num_entries; 1110 Entry mru_list; 1111 pthread_mutex_t lock; 1112 unsigned generation; 1113 int last_id; 1114 Entry* entries[ MAX_HASH_ENTRIES ]; 1115 } Cache; 1116 1117 1118 #define HTABLE_VALID(x) ((x) != NULL && (x) != HTABLE_DELETED) 1119 1120 static void 1121 _cache_flush_locked( Cache* cache ) 1122 { 1123 int nn; 1124 time_t now = _time_now(); 1125 1126 for (nn = 0; nn < MAX_HASH_ENTRIES; nn++) 1127 { 1128 Entry** pnode = &cache->entries[nn]; 1129 1130 while (*pnode != NULL) { 1131 Entry* node = *pnode; 1132 *pnode = node->hlink; 1133 entry_free(node); 1134 } 1135 } 1136 1137 cache->mru_list.mru_next = cache->mru_list.mru_prev = &cache->mru_list; 1138 cache->num_entries = 0; 1139 cache->last_id = 0; 1140 1141 XLOG("*************************\n" 1142 "*** DNS CACHE FLUSHED ***\n" 1143 "*************************"); 1144 } 1145 1146 struct resolv_cache* 1147 _resolv_cache_create( void ) 1148 { 1149 struct resolv_cache* cache; 1150 1151 cache = calloc(sizeof(*cache), 1); 1152 if (cache) { 1153 cache->generation = ~0U; 1154 pthread_mutex_init( &cache->lock, NULL ); 1155 cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list; 1156 XLOG("%s: cache created\n", __FUNCTION__); 1157 } 1158 return cache; 1159 } 1160 1161 1162 #if DEBUG 1163 static void 1164 _dump_query( const uint8_t* query, int querylen ) 1165 { 1166 char temp[256], *p=temp, *end=p+sizeof(temp); 1167 DnsPacket pack[1]; 1168 1169 _dnsPacket_init(pack, query, querylen); 1170 p = _dnsPacket_bprintQuery(pack, p, end); 1171 XLOG("QUERY: %s", temp); 1172 } 1173 1174 static void 1175 _cache_dump_mru( Cache* cache ) 1176 { 1177 char temp[512], *p=temp, *end=p+sizeof(temp); 1178 Entry* e; 1179 1180 p = _bprint(temp, end, "MRU LIST (%2d): ", cache->num_entries); 1181 for (e = cache->mru_list.mru_next; e != &cache->mru_list; e = e->mru_next) 1182 p = _bprint(p, end, " %d", e->id); 1183 1184 XLOG("%s", temp); 1185 } 1186 #endif 1187 1188 #if DEBUG 1189 # define XLOG_QUERY(q,len) _dump_query((q), (len)) 1190 #else 1191 # define XLOG_QUERY(q,len) ((void)0) 1192 #endif 1193 1194 /* This function tries to find a key within the hash table 1195 * In case of success, it will return a *pointer* to the hashed key. 1196 * In case of failure, it will return a *pointer* to NULL 1197 * 1198 * So, the caller must check '*result' to check for success/failure. 1199 * 1200 * The main idea is that the result can later be used directly in 1201 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup' 1202 * parameter. This makes the code simpler and avoids re-searching 1203 * for the key position in the htable. 1204 * 1205 * The result of a lookup_p is only valid until you alter the hash 1206 * table. 1207 */ 1208 static Entry** 1209 _cache_lookup_p( Cache* cache, 1210 Entry* key ) 1211 { 1212 int index = key->hash % MAX_HASH_ENTRIES; 1213 Entry** pnode = &cache->entries[ key->hash % MAX_HASH_ENTRIES ]; 1214 1215 while (*pnode != NULL) { 1216 Entry* node = *pnode; 1217 1218 if (node == NULL) 1219 break; 1220 1221 if (node->hash == key->hash && entry_equals(node, key)) 1222 break; 1223 1224 pnode = &node->hlink; 1225 } 1226 return pnode; 1227 } 1228 1229 /* Add a new entry to the hash table. 'lookup' must be the 1230 * result of an immediate previous failed _lookup_p() call 1231 * (i.e. with *lookup == NULL), and 'e' is the pointer to the 1232 * newly created entry 1233 */ 1234 static void 1235 _cache_add_p( Cache* cache, 1236 Entry** lookup, 1237 Entry* e ) 1238 { 1239 *lookup = e; 1240 e->id = ++cache->last_id; 1241 entry_mru_add(e, &cache->mru_list); 1242 cache->num_entries += 1; 1243 1244 XLOG("%s: entry %d added (count=%d)", __FUNCTION__, 1245 e->id, cache->num_entries); 1246 } 1247 1248 /* Remove an existing entry from the hash table, 1249 * 'lookup' must be the result of an immediate previous 1250 * and succesful _lookup_p() call. 1251 */ 1252 static void 1253 _cache_remove_p( Cache* cache, 1254 Entry** lookup ) 1255 { 1256 Entry* e = *lookup; 1257 1258 XLOG("%s: entry %d removed (count=%d)", __FUNCTION__, 1259 e->id, cache->num_entries-1); 1260 1261 entry_mru_remove(e); 1262 *lookup = e->hlink; 1263 entry_free(e); 1264 cache->num_entries -= 1; 1265 } 1266 1267 /* Remove the oldest entry from the hash table. 1268 */ 1269 static void 1270 _cache_remove_oldest( Cache* cache ) 1271 { 1272 Entry* oldest = cache->mru_list.mru_prev; 1273 Entry** lookup = _cache_lookup_p(cache, oldest); 1274 1275 if (*lookup == NULL) { /* should not happen */ 1276 XLOG("%s: OLDEST NOT IN HTABLE ?", __FUNCTION__); 1277 return; 1278 } 1279 _cache_remove_p(cache, lookup); 1280 } 1281 1282 1283 ResolvCacheStatus 1284 _resolv_cache_lookup( struct resolv_cache* cache, 1285 const void* query, 1286 int querylen, 1287 void* answer, 1288 int answersize, 1289 int *answerlen ) 1290 { 1291 DnsPacket pack[1]; 1292 Entry key[1]; 1293 int index; 1294 Entry** lookup; 1295 Entry* e; 1296 time_t now; 1297 1298 ResolvCacheStatus result = RESOLV_CACHE_NOTFOUND; 1299 1300 XLOG("%s: lookup", __FUNCTION__); 1301 XLOG_QUERY(query, querylen); 1302 1303 /* we don't cache malformed queries */ 1304 if (!entry_init_key(key, query, querylen)) { 1305 XLOG("%s: unsupported query", __FUNCTION__); 1306 return RESOLV_CACHE_UNSUPPORTED; 1307 } 1308 /* lookup cache */ 1309 pthread_mutex_lock( &cache->lock ); 1310 1311 /* see the description of _lookup_p to understand this. 1312 * the function always return a non-NULL pointer. 1313 */ 1314 lookup = _cache_lookup_p(cache, key); 1315 e = *lookup; 1316 1317 if (e == NULL) { 1318 XLOG( "NOT IN CACHE"); 1319 goto Exit; 1320 } 1321 1322 now = _time_now(); 1323 1324 /* remove stale entries here */ 1325 if ( (unsigned)(now - e->when) >= CONFIG_SECONDS ) { 1326 XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup ); 1327 _cache_remove_p(cache, lookup); 1328 goto Exit; 1329 } 1330 1331 *answerlen = e->answerlen; 1332 if (e->answerlen > answersize) { 1333 /* NOTE: we return UNSUPPORTED if the answer buffer is too short */ 1334 result = RESOLV_CACHE_UNSUPPORTED; 1335 XLOG(" ANSWER TOO LONG"); 1336 goto Exit; 1337 } 1338 1339 memcpy( answer, e->answer, e->answerlen ); 1340 1341 /* bump up this entry to the top of the MRU list */ 1342 if (e != cache->mru_list.mru_next) { 1343 entry_mru_remove( e ); 1344 entry_mru_add( e, &cache->mru_list ); 1345 } 1346 1347 XLOG( "FOUND IN CACHE entry=%p", e ); 1348 result = RESOLV_CACHE_FOUND; 1349 1350 Exit: 1351 pthread_mutex_unlock( &cache->lock ); 1352 return result; 1353 } 1354 1355 1356 void 1357 _resolv_cache_add( struct resolv_cache* cache, 1358 const void* query, 1359 int querylen, 1360 const void* answer, 1361 int answerlen ) 1362 { 1363 Entry key[1]; 1364 Entry* e; 1365 Entry** lookup; 1366 1367 /* don't assume that the query has already been cached 1368 */ 1369 if (!entry_init_key( key, query, querylen )) { 1370 XLOG( "%s: passed invalid query ?", __FUNCTION__); 1371 return; 1372 } 1373 1374 pthread_mutex_lock( &cache->lock ); 1375 1376 XLOG( "%s: query:", __FUNCTION__ ); 1377 XLOG_QUERY(query,querylen); 1378 #if DEBUG_DATA 1379 XLOG( "answer:"); 1380 XLOG_BYTES(answer,answerlen); 1381 #endif 1382 1383 lookup = _cache_lookup_p(cache, key); 1384 e = *lookup; 1385 1386 if (e != NULL) { /* should not happen */ 1387 XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD", 1388 __FUNCTION__, e); 1389 goto Exit; 1390 } 1391 1392 if (cache->num_entries >= CONFIG_MAX_ENTRIES) { 1393 _cache_remove_oldest(cache); 1394 /* need to lookup again */ 1395 lookup = _cache_lookup_p(cache, key); 1396 e = *lookup; 1397 if (e != NULL) { 1398 XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD", 1399 __FUNCTION__, e); 1400 goto Exit; 1401 } 1402 } 1403 1404 e = entry_alloc( key, answer, answerlen ); 1405 if (e != NULL) { 1406 _cache_add_p(cache, lookup, e); 1407 } 1408 #if DEBUG 1409 _cache_dump_mru(cache); 1410 #endif 1411 Exit: 1412 pthread_mutex_unlock( &cache->lock ); 1413 } 1414 1415 /****************************************************************************/ 1416 /****************************************************************************/ 1417 /***** *****/ 1418 /***** *****/ 1419 /***** *****/ 1420 /****************************************************************************/ 1421 /****************************************************************************/ 1422 1423 static struct resolv_cache* _res_cache; 1424 static pthread_once_t _res_cache_once; 1425 1426 static void 1427 _res_cache_init( void ) 1428 { 1429 const char* env = getenv(CONFIG_ENV); 1430 1431 if (env && atoi(env) == 0) { 1432 /* the cache is disabled */ 1433 return; 1434 } 1435 1436 _res_cache = _resolv_cache_create(); 1437 } 1438 1439 1440 struct resolv_cache* 1441 __get_res_cache( void ) 1442 { 1443 pthread_once( &_res_cache_once, _res_cache_init ); 1444 return _res_cache; 1445 } 1446 1447 void 1448 _resolv_cache_reset( unsigned generation ) 1449 { 1450 XLOG("%s: generation=%d", __FUNCTION__, generation); 1451 1452 if (_res_cache == NULL) 1453 return; 1454 1455 pthread_mutex_lock( &_res_cache->lock ); 1456 if (_res_cache->generation != generation) { 1457 _cache_flush_locked(_res_cache); 1458 _res_cache->generation = generation; 1459 } 1460 pthread_mutex_unlock( &_res_cache->lock ); 1461 } 1462