Home | History | Annotate | Download | only in resolv
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <stdbool.h>
     18 #include <arpa/nameser.h>
     19 #include <string.h>
     20 
     21 #include <async_safe/log.h>
     22 
     23 #include "isc/eventlib.h"
     24 #include "resolv_stats.h"
     25 
     26 #define DBG 0
     27 
     28 /* Calculate the round-trip-time from start time t0 and end time t1. */
     29 int
     30 _res_stats_calculate_rtt(const struct timespec* t1, const struct timespec* t0) {
     31     // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious)
     32     long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000;
     33     long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000;
     34     return (int) (ms1 - ms0);
     35 }
     36 
     37 /* Create a sample for calculating server reachability statistics. */
     38 void
     39 _res_stats_set_sample(struct __res_sample* sample, time_t now, int rcode, int rtt)
     40 {
     41     if (DBG) {
     42         async_safe_format_log(ANDROID_LOG_INFO, "libc", "rcode = %d, sec = %d", rcode, rtt);
     43     }
     44     sample->at = now;
     45     sample->rcode = rcode;
     46     sample->rtt = rtt;
     47 }
     48 
     49 /* Clears all stored samples for the given server. */
     50 void
     51 _res_stats_clear_samples(struct __res_stats* stats)
     52 {
     53     stats->sample_count = stats->sample_next = 0;
     54 }
     55 
     56 /* Aggregates the reachability statistics for the given server based on on the stored samples. */
     57 void
     58 android_net_res_stats_aggregate(struct __res_stats* stats, int* successes, int* errors,
     59         int* timeouts, int* internal_errors, int* rtt_avg, time_t* last_sample_time)
     60 {
     61     int s = 0;   // successes
     62     int e = 0;   // errors
     63     int t = 0;   // timouts
     64     int ie = 0;  // internal errors
     65     long rtt_sum = 0;
     66     time_t last = 0;
     67     int rtt_count = 0;
     68     for (int i = 0 ; i < stats->sample_count ; ++i) {
     69         // Treat everything as an error that the code in send_dg() already considers a
     70         // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN
     71         // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section
     72         // is not treated as an error here either. FORMERR seems to sometimes be returned by
     73         // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses
     74         // as an indication of a broken server is unclear, though. For now treat such responses,
     75         // as well as unknown codes as errors.
     76         switch (stats->samples[i].rcode) {
     77         case NOERROR:
     78         case NOTAUTH:
     79         case NXDOMAIN:
     80             ++s;
     81             rtt_sum += stats->samples[i].rtt;
     82             ++rtt_count;
     83             break;
     84         case RCODE_TIMEOUT:
     85             ++t;
     86             break;
     87         case RCODE_INTERNAL_ERROR:
     88             ++ie;
     89             break;
     90         case SERVFAIL:
     91         case NOTIMP:
     92         case REFUSED:
     93         default:
     94             ++e;
     95             break;
     96         }
     97     }
     98     *successes = s;
     99     *errors = e;
    100     *timeouts = t;
    101     *internal_errors = ie;
    102     /* If there was at least one successful sample, calculate average RTT. */
    103     if (rtt_count) {
    104         *rtt_avg = rtt_sum / rtt_count;
    105     } else {
    106         *rtt_avg = -1;
    107     }
    108     /* If we had at least one sample, populate last sample time. */
    109     if (stats->sample_count > 0) {
    110         if (stats->sample_next > 0) {
    111             last = stats->samples[stats->sample_next - 1].at;
    112         } else {
    113             last = stats->samples[stats->sample_count - 1].at;
    114         }
    115     }
    116     *last_sample_time = last;
    117 }
    118 
    119 bool
    120 _res_stats_usable_server(const struct __res_params* params, struct __res_stats* stats) {
    121     int successes = -1;
    122     int errors = -1;
    123     int timeouts = -1;
    124     int internal_errors = -1;
    125     int rtt_avg = -1;
    126     time_t last_sample_time = 0;
    127     android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors,
    128             &rtt_avg, &last_sample_time);
    129     if (successes >= 0 && errors >= 0 && timeouts >= 0) {
    130         int total = successes + errors + timeouts;
    131         if (DBG) {
    132             async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "NS stats: S %d + E %d + T %d + I %d "
    133                  "= %d, rtt = %d, min_samples = %d\n", successes, errors, timeouts, internal_errors,
    134                  total, rtt_avg, params->min_samples);
    135         }
    136         if (total >= params->min_samples && (errors > 0 || timeouts > 0)) {
    137             int success_rate = successes * 100 / total;
    138             if (DBG) {
    139                 async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "success rate %d%%\n",
    140                                       success_rate);
    141             }
    142             if (success_rate < params->success_threshold) {
    143                 // evNowTime() is used here instead of time() to stay consistent with the rest of
    144                 // the code base
    145                 time_t now = evNowTime().tv_sec;
    146                 if (now - last_sample_time > params->sample_validity) {
    147                     // Note: It might be worth considering to expire old servers after their expiry
    148                     // date has been reached, however the code for returning the ring buffer to its
    149                     // previous non-circular state would induce additional complexity.
    150                     if (DBG) {
    151                         async_safe_format_log(ANDROID_LOG_INFO, "libc",
    152                             "samples stale, retrying server\n");
    153                     }
    154                     _res_stats_clear_samples(stats);
    155                 } else {
    156                     if (DBG) {
    157                         async_safe_format_log(ANDROID_LOG_INFO, "libc",
    158                             "too many resolution errors, ignoring server\n");
    159                     }
    160                     return 0;
    161                 }
    162             }
    163         }
    164     }
    165     return 1;
    166 }
    167 
    168 void
    169 android_net_res_stats_get_usable_servers(const struct __res_params* params,
    170         struct __res_stats stats[], int nscount, bool usable_servers[]) {
    171     unsigned usable_servers_found = 0;
    172     for (int ns = 0; ns < nscount; ns++) {
    173         bool usable = _res_stats_usable_server(params, &stats[ns]);
    174         if (usable) {
    175             ++usable_servers_found;
    176         }
    177         usable_servers[ns] = usable;
    178     }
    179     // If there are no usable servers, consider all of them usable.
    180     // TODO: Explore other possibilities, such as enabling only the best N servers, etc.
    181     if (usable_servers_found == 0) {
    182         for (int ns = 0; ns < nscount; ns++) {
    183             usable_servers[ns] = true;
    184         }
    185     }
    186 }
    187