1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <stdbool.h> 18 #include <arpa/nameser.h> 19 #include <string.h> 20 21 #include <async_safe/log.h> 22 23 #include "isc/eventlib.h" 24 #include "resolv_stats.h" 25 26 #define DBG 0 27 28 /* Calculate the round-trip-time from start time t0 and end time t1. */ 29 int 30 _res_stats_calculate_rtt(const struct timespec* t1, const struct timespec* t0) { 31 // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious) 32 long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000; 33 long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000; 34 return (int) (ms1 - ms0); 35 } 36 37 /* Create a sample for calculating server reachability statistics. */ 38 void 39 _res_stats_set_sample(struct __res_sample* sample, time_t now, int rcode, int rtt) 40 { 41 if (DBG) { 42 async_safe_format_log(ANDROID_LOG_INFO, "libc", "rcode = %d, sec = %d", rcode, rtt); 43 } 44 sample->at = now; 45 sample->rcode = rcode; 46 sample->rtt = rtt; 47 } 48 49 /* Clears all stored samples for the given server. */ 50 void 51 _res_stats_clear_samples(struct __res_stats* stats) 52 { 53 stats->sample_count = stats->sample_next = 0; 54 } 55 56 /* Aggregates the reachability statistics for the given server based on on the stored samples. */ 57 void 58 android_net_res_stats_aggregate(struct __res_stats* stats, int* successes, int* errors, 59 int* timeouts, int* internal_errors, int* rtt_avg, time_t* last_sample_time) 60 { 61 int s = 0; // successes 62 int e = 0; // errors 63 int t = 0; // timouts 64 int ie = 0; // internal errors 65 long rtt_sum = 0; 66 time_t last = 0; 67 int rtt_count = 0; 68 for (int i = 0 ; i < stats->sample_count ; ++i) { 69 // Treat everything as an error that the code in send_dg() already considers a 70 // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN 71 // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section 72 // is not treated as an error here either. FORMERR seems to sometimes be returned by 73 // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses 74 // as an indication of a broken server is unclear, though. For now treat such responses, 75 // as well as unknown codes as errors. 76 switch (stats->samples[i].rcode) { 77 case NOERROR: 78 case NOTAUTH: 79 case NXDOMAIN: 80 ++s; 81 rtt_sum += stats->samples[i].rtt; 82 ++rtt_count; 83 break; 84 case RCODE_TIMEOUT: 85 ++t; 86 break; 87 case RCODE_INTERNAL_ERROR: 88 ++ie; 89 break; 90 case SERVFAIL: 91 case NOTIMP: 92 case REFUSED: 93 default: 94 ++e; 95 break; 96 } 97 } 98 *successes = s; 99 *errors = e; 100 *timeouts = t; 101 *internal_errors = ie; 102 /* If there was at least one successful sample, calculate average RTT. */ 103 if (rtt_count) { 104 *rtt_avg = rtt_sum / rtt_count; 105 } else { 106 *rtt_avg = -1; 107 } 108 /* If we had at least one sample, populate last sample time. */ 109 if (stats->sample_count > 0) { 110 if (stats->sample_next > 0) { 111 last = stats->samples[stats->sample_next - 1].at; 112 } else { 113 last = stats->samples[stats->sample_count - 1].at; 114 } 115 } 116 *last_sample_time = last; 117 } 118 119 bool 120 _res_stats_usable_server(const struct __res_params* params, struct __res_stats* stats) { 121 int successes = -1; 122 int errors = -1; 123 int timeouts = -1; 124 int internal_errors = -1; 125 int rtt_avg = -1; 126 time_t last_sample_time = 0; 127 android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors, 128 &rtt_avg, &last_sample_time); 129 if (successes >= 0 && errors >= 0 && timeouts >= 0) { 130 int total = successes + errors + timeouts; 131 if (DBG) { 132 async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "NS stats: S %d + E %d + T %d + I %d " 133 "= %d, rtt = %d, min_samples = %d\n", successes, errors, timeouts, internal_errors, 134 total, rtt_avg, params->min_samples); 135 } 136 if (total >= params->min_samples && (errors > 0 || timeouts > 0)) { 137 int success_rate = successes * 100 / total; 138 if (DBG) { 139 async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "success rate %d%%\n", 140 success_rate); 141 } 142 if (success_rate < params->success_threshold) { 143 // evNowTime() is used here instead of time() to stay consistent with the rest of 144 // the code base 145 time_t now = evNowTime().tv_sec; 146 if (now - last_sample_time > params->sample_validity) { 147 // Note: It might be worth considering to expire old servers after their expiry 148 // date has been reached, however the code for returning the ring buffer to its 149 // previous non-circular state would induce additional complexity. 150 if (DBG) { 151 async_safe_format_log(ANDROID_LOG_INFO, "libc", 152 "samples stale, retrying server\n"); 153 } 154 _res_stats_clear_samples(stats); 155 } else { 156 if (DBG) { 157 async_safe_format_log(ANDROID_LOG_INFO, "libc", 158 "too many resolution errors, ignoring server\n"); 159 } 160 return 0; 161 } 162 } 163 } 164 } 165 return 1; 166 } 167 168 void 169 android_net_res_stats_get_usable_servers(const struct __res_params* params, 170 struct __res_stats stats[], int nscount, bool usable_servers[]) { 171 unsigned usable_servers_found = 0; 172 for (int ns = 0; ns < nscount; ns++) { 173 bool usable = _res_stats_usable_server(params, &stats[ns]); 174 if (usable) { 175 ++usable_servers_found; 176 } 177 usable_servers[ns] = usable; 178 } 179 // If there are no usable servers, consider all of them usable. 180 // TODO: Explore other possibilities, such as enabling only the best N servers, etc. 181 if (usable_servers_found == 0) { 182 for (int ns = 0; ns < nscount; ns++) { 183 usable_servers[ns] = true; 184 } 185 } 186 } 187