1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <fcntl.h> 18 #include <stdlib.h> 19 #include <sys/mman.h> 20 21 extern "C" { 22 #include <fec.h> 23 } 24 25 #include "fec_private.h" 26 27 using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>; 28 29 /* prints a hexdump of `data' using warn(...) */ 30 static void dump(const char *name, uint64_t value, const uint8_t *data, 31 size_t size) 32 { 33 const int bytes_per_line = 16; 34 char hex[bytes_per_line * 3 + 1]; 35 char prn[bytes_per_line + 1]; 36 37 warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size); 38 39 if (!data) { 40 warn(" (null)"); 41 return; 42 } 43 44 for (size_t n = 0; n < size; n += bytes_per_line) { 45 memset(hex, 0, sizeof(hex)); 46 memset(prn, 0, sizeof(prn)); 47 48 for (size_t m = 0; m < bytes_per_line; ++m) { 49 if (n + m < size) { 50 sprintf(&hex[m * 3], "%02x ", data[n + m]); 51 52 if (isprint(data[n + m])) { 53 prn[m] = data[n + m]; 54 } else { 55 prn[m] = '.'; 56 } 57 } else { 58 strcpy(&hex[m * 3], " "); 59 } 60 } 61 62 warn(" %04zu %s %s", n, hex, prn); 63 } 64 } 65 66 /* checks if `offset' is within a corrupted block */ 67 static inline bool is_erasure(fec_handle *f, uint64_t offset, 68 const uint8_t *data) 69 { 70 if (unlikely(offset >= f->data_size)) { 71 return false; 72 } 73 74 /* ideally, we would like to know if a specific byte on this block has 75 been corrupted, but knowing whether any of them is can be useful as 76 well, because often the entire block is corrupted */ 77 78 uint64_t n = offset / FEC_BLOCKSIZE; 79 80 return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH], 81 data); 82 } 83 84 /* check if `offset' is within a block expected to contain zeros */ 85 static inline bool is_zero(fec_handle *f, uint64_t offset) 86 { 87 verity_info *v = &f->verity; 88 89 if (!v->hash || unlikely(offset >= f->data_size)) { 90 return false; 91 } 92 93 uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH; 94 95 if (unlikely(hash_offset > 96 v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) { 97 return false; 98 } 99 100 return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH); 101 } 102 103 /* reads and decodes a single block starting from `offset', returns the number 104 of bytes corrected in `errors' */ 105 static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset, 106 bool use_erasures, uint8_t *ecc_data, size_t *errors) 107 { 108 check(offset % FEC_BLOCKSIZE == 0); 109 ecc_info *e = &f->ecc; 110 111 /* reverse interleaving: calculate the RS block that includes the requested 112 offset */ 113 uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) * 114 e->rounds * FEC_BLOCKSIZE; 115 int data_index = -1; 116 int erasures[e->rsn]; 117 int neras = 0; 118 119 /* verity is required to check for erasures */ 120 check(!use_erasures || f->verity.hash); 121 122 for (int i = 0; i < e->rsn; ++i) { 123 uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn, 124 e->rounds); 125 126 if (interleaved == offset) { 127 data_index = i; 128 } 129 130 /* to improve our chances of correcting IO errors, initialize the 131 buffer to zeros even if we are going to read to it later */ 132 uint8_t bbuf[FEC_BLOCKSIZE] = {0}; 133 134 if (likely(interleaved < e->start) && !is_zero(f, interleaved)) { 135 /* copy raw data to reconstruct the RS block */ 136 if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) { 137 warn("failed to read: %s", strerror(errno)); 138 139 /* treat errors as corruption */ 140 if (use_erasures && neras <= e->roots) { 141 erasures[neras++] = i; 142 } 143 } else if (use_erasures && neras <= e->roots && 144 is_erasure(f, interleaved, bbuf)) { 145 erasures[neras++] = i; 146 } 147 } 148 149 for (int j = 0; j < FEC_BLOCKSIZE; ++j) { 150 ecc_data[j * FEC_RSM + i] = bbuf[j]; 151 } 152 } 153 154 check(data_index >= 0); 155 156 size_t nerrs = 0; 157 uint8_t copy[FEC_RSM]; 158 159 for (int i = 0; i < FEC_BLOCKSIZE; ++i) { 160 /* copy parity data */ 161 if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots, 162 e->start + (i + rsb) * e->roots)) { 163 error("failed to read ecc data: %s", strerror(errno)); 164 return -1; 165 } 166 167 /* for debugging decoding failures, because decode_rs_char can mangle 168 ecc_data */ 169 if (unlikely(use_erasures)) { 170 memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM); 171 } 172 173 /* decode */ 174 int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras); 175 176 if (unlikely(rc < 0)) { 177 if (use_erasures) { 178 error("RS block %" PRIu64 ": decoding failed (%d erasures)", 179 rsb, neras); 180 dump("raw RS block", rsb, copy, FEC_RSM); 181 } else if (!f->verity.hash) { 182 warn("RS block %" PRIu64 ": decoding failed", rsb); 183 } else { 184 debug("RS block %" PRIu64 ": decoding failed", rsb); 185 } 186 187 errno = EIO; 188 return -1; 189 } else if (unlikely(rc > 0)) { 190 check(rc <= (use_erasures ? e->roots : e->roots / 2)); 191 nerrs += rc; 192 } 193 194 dest[i] = ecc_data[i * FEC_RSM + data_index]; 195 } 196 197 if (nerrs) { 198 warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs); 199 *errors += nerrs; 200 } 201 202 return FEC_BLOCKSIZE; 203 } 204 205 /* initializes RS decoder and allocates memory for interleaving */ 206 static int ecc_init(fec_handle *f, rs_unique_ptr& rs, 207 std::unique_ptr<uint8_t[]>& ecc_data) 208 { 209 check(f); 210 211 rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots))); 212 213 if (unlikely(!rs)) { 214 error("failed to initialize RS"); 215 errno = ENOMEM; 216 return -1; 217 } 218 219 ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]); 220 221 if (unlikely(!ecc_data)) { 222 error("failed to allocate ecc buffer"); 223 errno = ENOMEM; 224 return -1; 225 } 226 227 return 0; 228 } 229 230 /* reads `count' bytes from `offset' and corrects possible errors without 231 erasure detection, returning the number of corrected bytes in `errors' */ 232 static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count, 233 uint64_t offset, size_t *errors) 234 { 235 check(f); 236 check(dest); 237 check(offset < f->data_size); 238 check(offset + count <= f->data_size); 239 check(errors); 240 241 debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); 242 243 rs_unique_ptr rs(NULL, free_rs_char); 244 std::unique_ptr<uint8_t[]> ecc_data; 245 246 if (ecc_init(f, rs, ecc_data) == -1) { 247 return -1; 248 } 249 250 uint64_t curr = offset / FEC_BLOCKSIZE; 251 size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); 252 size_t left = count; 253 254 uint8_t data[FEC_BLOCKSIZE]; 255 256 while (left > 0) { 257 /* there's no erasure detection without verity metadata */ 258 if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false, 259 ecc_data.get(), errors) == -1) { 260 return -1; 261 } 262 263 size_t copy = FEC_BLOCKSIZE - coff; 264 265 if (copy > left) { 266 copy = left; 267 } 268 269 memcpy(dest, &data[coff], copy); 270 271 dest += copy; 272 left -= copy; 273 coff = 0; 274 ++curr; 275 } 276 277 return count; 278 } 279 280 /* reads `count' bytes from `offset', corrects possible errors with 281 erasure detection, and verifies the integrity of read data using 282 verity hash tree; returns the number of corrections in `errors' */ 283 static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count, 284 uint64_t offset, size_t *errors) 285 { 286 check(f); 287 check(dest); 288 check(offset < f->data_size); 289 check(offset + count <= f->data_size); 290 check(f->verity.hash); 291 check(errors); 292 293 debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); 294 295 rs_unique_ptr rs(NULL, free_rs_char); 296 std::unique_ptr<uint8_t[]> ecc_data; 297 298 if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) { 299 return -1; 300 } 301 302 uint64_t curr = offset / FEC_BLOCKSIZE; 303 size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); 304 size_t left = count; 305 uint8_t data[FEC_BLOCKSIZE]; 306 307 uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE - 308 SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH; 309 310 while (left > 0) { 311 check(curr <= max_hash_block); 312 313 uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH]; 314 uint64_t curr_offset = curr * FEC_BLOCKSIZE; 315 316 bool expect_zeros = is_zero(f, curr_offset); 317 318 /* if we are in read-only mode and expect to read a zero block, 319 skip reading and just return zeros */ 320 if (f->mode & O_RDONLY && expect_zeros) { 321 memset(data, 0, FEC_BLOCKSIZE); 322 goto valid; 323 } 324 325 /* copy raw data without error correction */ 326 if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) { 327 error("failed to read: %s", strerror(errno)); 328 return -1; 329 } 330 331 if (likely(verity_check_block(f, hash, data))) { 332 goto valid; 333 } 334 335 /* we know the block is supposed to contain zeros, so return zeros 336 instead of trying to correct it */ 337 if (expect_zeros) { 338 memset(data, 0, FEC_BLOCKSIZE); 339 goto corrected; 340 } 341 342 if (!f->ecc.start) { 343 /* fatal error without ecc */ 344 error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, 345 offset, offset + count, curr); 346 return -1; 347 } else { 348 debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, 349 offset, offset + count, curr); 350 } 351 352 /* try to correct without erasures first, because checking for 353 erasure locations is slower */ 354 if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(), 355 errors) == FEC_BLOCKSIZE && 356 verity_check_block(f, hash, data)) { 357 goto corrected; 358 } 359 360 /* try to correct with erasures */ 361 if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(), 362 errors) == FEC_BLOCKSIZE && 363 verity_check_block(f, hash, data)) { 364 goto corrected; 365 } 366 367 error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64 368 " (offset %" PRIu64 ") cannot be recovered", 369 offset, offset + count, curr, curr_offset); 370 dump("decoded block", curr, data, FEC_BLOCKSIZE); 371 372 errno = EIO; 373 return -1; 374 375 corrected: 376 /* update the corrected block to the file if we are in r/w mode */ 377 if (f->mode & O_RDWR && 378 !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) { 379 error("failed to write: %s", strerror(errno)); 380 return -1; 381 } 382 383 valid: 384 size_t copy = FEC_BLOCKSIZE - coff; 385 386 if (copy > left) { 387 copy = left; 388 } 389 390 memcpy(dest, &data[coff], copy); 391 392 dest += copy; 393 left -= copy; 394 coff = 0; 395 ++curr; 396 } 397 398 return count; 399 } 400 401 /* sets the internal file position to `offset' relative to `whence' */ 402 int fec_seek(struct fec_handle *f, int64_t offset, int whence) 403 { 404 check(f); 405 406 if (whence == SEEK_SET) { 407 if (offset < 0) { 408 errno = EOVERFLOW; 409 return -1; 410 } 411 412 f->pos = offset; 413 } else if (whence == SEEK_CUR) { 414 if (offset < 0 && f->pos < (uint64_t)-offset) { 415 errno = EOVERFLOW; 416 return -1; 417 } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) { 418 errno = EOVERFLOW; 419 return -1; 420 } 421 422 f->pos += offset; 423 } else if (whence == SEEK_END) { 424 if (offset >= 0) { 425 errno = ENXIO; 426 return -1; 427 } else if ((uint64_t)-offset > f->size) { 428 errno = EOVERFLOW; 429 return -1; 430 } 431 432 f->pos = f->size + offset; 433 } else { 434 errno = EINVAL; 435 return -1; 436 } 437 438 return 0; 439 } 440 441 /* reads up to `count' bytes starting from the internal file position using 442 error correction and integrity validation, if available */ 443 ssize_t fec_read(struct fec_handle *f, void *buf, size_t count) 444 { 445 ssize_t rc = fec_pread(f, buf, count, f->pos); 446 447 if (rc > 0) { 448 check(f->pos < UINT64_MAX - rc); 449 f->pos += rc; 450 } 451 452 return rc; 453 } 454 455 /* for a file with size `max', returns the number of bytes we can read starting 456 from `offset', up to `count' bytes */ 457 static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max) 458 { 459 if (offset >= max) { 460 return 0; 461 } else if (offset > max - count) { 462 return (size_t)(max - offset); 463 } 464 465 return count; 466 } 467 468 /* reads `count' bytes from `f->fd' starting from `offset', and copies the 469 data to `buf' */ 470 bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset) 471 { 472 check(f); 473 check(buf); 474 475 uint8_t *p = (uint8_t *)buf; 476 size_t remaining = count; 477 478 while (remaining > 0) { 479 ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset)); 480 481 if (n <= 0) { 482 return false; 483 } 484 485 p += n; 486 remaining -= n; 487 offset += n; 488 } 489 490 return true; 491 } 492 493 /* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */ 494 bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset) 495 { 496 check(f); 497 check(buf); 498 499 const uint8_t *p = (const uint8_t *)buf; 500 size_t remaining = count; 501 502 while (remaining > 0) { 503 ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset)); 504 505 if (n <= 0) { 506 return false; 507 } 508 509 p += n; 510 remaining -= n; 511 offset += n; 512 } 513 514 return true; 515 } 516 517 /* reads up to `count' bytes starting from `offset' using error correction and 518 integrity validation, if available */ 519 ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count, 520 uint64_t offset) 521 { 522 check(f); 523 check(buf); 524 525 if (unlikely(offset > UINT64_MAX - count)) { 526 errno = EOVERFLOW; 527 return -1; 528 } 529 530 if (f->verity.hash) { 531 return process(f, (uint8_t *)buf, 532 get_max_count(offset, count, f->data_size), offset, 533 verity_read); 534 } else if (f->ecc.start) { 535 check(f->ecc.start < f->size); 536 537 count = get_max_count(offset, count, f->data_size); 538 ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read); 539 540 if (rc >= 0) { 541 return rc; 542 } 543 544 /* return raw data if pure ecc read fails; due to interleaving 545 the specific blocks the caller wants may still be fine */ 546 } else { 547 count = get_max_count(offset, count, f->size); 548 } 549 550 if (raw_pread(f, buf, count, offset)) { 551 return count; 552 } 553 554 return -1; 555 } 556