Home | History | Annotate | Download | only in resolv
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <stdbool.h>
     18 #include <arpa/nameser.h>
     19 #include <string.h>
     20 
     21 #include "resolv_stats.h"
     22 #include "private/libc_logging.h"
     23 #include "isc/eventlib.h"
     24 
     25 #define DBG 0
     26 
     27 /* Calculate the round-trip-time from start time t0 and end time t1. */
     28 int
     29 _res_stats_calculate_rtt(const struct timespec* t1, const struct timespec* t0) {
     30     // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious)
     31     long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000;
     32     long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000;
     33     return (int) (ms1 - ms0);
     34 }
     35 
     36 /* Create a sample for calculating server reachability statistics. */
     37 void
     38 _res_stats_set_sample(struct __res_sample* sample, time_t now, int rcode, int rtt)
     39 {
     40     if (DBG) {
     41         __libc_format_log(ANDROID_LOG_INFO, "libc", "rcode = %d, sec = %d", rcode, rtt);
     42     }
     43     sample->at = now;
     44     sample->rcode = rcode;
     45     sample->rtt = rtt;
     46 }
     47 
     48 /* Clears all stored samples for the given server. */
     49 void
     50 _res_stats_clear_samples(struct __res_stats* stats)
     51 {
     52     stats->sample_count = stats->sample_next = 0;
     53 }
     54 
     55 /* Aggregates the reachability statistics for the given server based on on the stored samples. */
     56 void
     57 android_net_res_stats_aggregate(struct __res_stats* stats, int* successes, int* errors,
     58         int* timeouts, int* internal_errors, int* rtt_avg, time_t* last_sample_time)
     59 {
     60     int s = 0;   // successes
     61     int e = 0;   // errors
     62     int t = 0;   // timouts
     63     int ie = 0;  // internal errors
     64     long rtt_sum = 0;
     65     time_t last = 0;
     66     int rtt_count = 0;
     67     for (int i = 0 ; i < stats->sample_count ; ++i) {
     68         // Treat everything as an error that the code in send_dg() already considers a
     69         // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN
     70         // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section
     71         // is not treated as an error here either. FORMERR seems to sometimes be returned by
     72         // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses
     73         // as an indication of a broken server is unclear, though. For now treat such responses,
     74         // as well as unknown codes as errors.
     75         switch (stats->samples[i].rcode) {
     76         case NOERROR:
     77         case NOTAUTH:
     78         case NXDOMAIN:
     79             ++s;
     80             rtt_sum += stats->samples[i].rtt;
     81             ++rtt_count;
     82             break;
     83         case RCODE_TIMEOUT:
     84             ++t;
     85             break;
     86         case RCODE_INTERNAL_ERROR:
     87             ++ie;
     88             break;
     89         case SERVFAIL:
     90         case NOTIMP:
     91         case REFUSED:
     92         default:
     93             ++e;
     94             break;
     95         }
     96     }
     97     *successes = s;
     98     *errors = e;
     99     *timeouts = t;
    100     *internal_errors = ie;
    101     /* If there was at least one successful sample, calculate average RTT. */
    102     if (rtt_count) {
    103         *rtt_avg = rtt_sum / rtt_count;
    104     } else {
    105         *rtt_avg = -1;
    106     }
    107     /* If we had at least one sample, populate last sample time. */
    108     if (stats->sample_count > 0) {
    109         if (stats->sample_next > 0) {
    110             last = stats->samples[stats->sample_next - 1].at;
    111         } else {
    112             last = stats->samples[stats->sample_count - 1].at;
    113         }
    114     }
    115     *last_sample_time = last;
    116 }
    117 
    118 bool
    119 _res_stats_usable_server(const struct __res_params* params, struct __res_stats* stats) {
    120     int successes = -1;
    121     int errors = -1;
    122     int timeouts = -1;
    123     int internal_errors = -1;
    124     int rtt_avg = -1;
    125     time_t last_sample_time = 0;
    126     android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors,
    127             &rtt_avg, &last_sample_time);
    128     if (successes >= 0 && errors >= 0 && timeouts >= 0) {
    129         int total = successes + errors + timeouts;
    130         if (DBG) {
    131             __libc_format_log(ANDROID_LOG_DEBUG, "libc", "NS stats: S %d + E %d + T %d + I %d "
    132                  "= %d, rtt = %d, min_samples = %d\n", successes, errors, timeouts, internal_errors,
    133                  total, rtt_avg, params->min_samples);
    134         }
    135         if (total >= params->min_samples && (errors > 0 || timeouts > 0)) {
    136             int success_rate = successes * 100 / total;
    137             if (DBG) {
    138                 __libc_format_log(ANDROID_LOG_DEBUG, "libc", "success rate %d%%\n", success_rate);
    139             }
    140             if (success_rate < params->success_threshold) {
    141                 // evNowTime() is used here instead of time() to stay consistent with the rest of
    142                 // the code base
    143                 time_t now = evNowTime().tv_sec;
    144                 if (now - last_sample_time > params->sample_validity) {
    145                     // Note: It might be worth considering to expire old servers after their expiry
    146                     // date has been reached, however the code for returning the ring buffer to its
    147                     // previous non-circular state would induce additional complexity.
    148                     if (DBG) {
    149                         __libc_format_log(ANDROID_LOG_INFO, "libc",
    150                             "samples stale, retrying server\n");
    151                     }
    152                     _res_stats_clear_samples(stats);
    153                 } else {
    154                     if (DBG) {
    155                         __libc_format_log(ANDROID_LOG_INFO, "libc",
    156                             "too many resolution errors, ignoring server\n");
    157                     }
    158                     return 0;
    159                 }
    160             }
    161         }
    162     }
    163     return 1;
    164 }
    165 
    166 void
    167 android_net_res_stats_get_usable_servers(const struct __res_params* params,
    168         struct __res_stats stats[], int nscount, bool usable_servers[]) {
    169     unsigned usable_servers_found = 0;
    170     for (int ns = 0; ns < nscount; ns++) {
    171         bool usable = _res_stats_usable_server(params, &stats[ns]);
    172         if (usable) {
    173             ++usable_servers_found;
    174         }
    175         usable_servers[ns] = usable;
    176     }
    177     // If there are no usable servers, consider all of them usable.
    178     // TODO: Explore other possibilities, such as enabling only the best N servers, etc.
    179     if (usable_servers_found == 0) {
    180         for (int ns = 0; ns < nscount; ns++) {
    181             usable_servers[ns] = true;
    182         }
    183     }
    184 }
    185