Home | History | Annotate | Download | only in libfec
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <stdlib.h>
     19 #include <sys/mman.h>
     20 
     21 extern "C" {
     22     #include <fec.h>
     23 }
     24 
     25 #include "fec_private.h"
     26 
     27 using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>;
     28 
     29 /* prints a hexdump of `data' using warn(...) */
     30 static void dump(const char *name, uint64_t value, const uint8_t *data,
     31         size_t size)
     32 {
     33     const int bytes_per_line = 16;
     34     char hex[bytes_per_line * 3 + 1];
     35     char prn[bytes_per_line + 1];
     36 
     37     warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size);
     38 
     39     if (!data) {
     40         warn("    (null)");
     41         return;
     42     }
     43 
     44     for (size_t n = 0; n < size; n += bytes_per_line) {
     45         memset(hex, 0, sizeof(hex));
     46         memset(prn, 0, sizeof(prn));
     47 
     48         for (size_t m = 0; m < bytes_per_line; ++m) {
     49             if (n + m < size) {
     50                 ptrdiff_t offset = &hex[m * 3] - hex;
     51                 snprintf(hex + offset, sizeof(hex) - offset, "%02x ",
     52                          data[n + m]);
     53 
     54                 if (isprint(data[n + m])) {
     55                     prn[m] = data[n + m];
     56                 } else {
     57                     prn[m] = '.';
     58                 }
     59             } else {
     60                 strcpy(&hex[m * 3], "   ");
     61             }
     62         }
     63 
     64         warn("    %04zu   %s  %s", n, hex, prn);
     65     }
     66 }
     67 
     68 /* checks if `offset' is within a corrupted block */
     69 static inline bool is_erasure(fec_handle *f, uint64_t offset,
     70         const uint8_t *data)
     71 {
     72     if (unlikely(offset >= f->data_size)) {
     73         return false;
     74     }
     75 
     76     /* ideally, we would like to know if a specific byte on this block has
     77        been corrupted, but knowing whether any of them is can be useful as
     78        well, because often the entire block is corrupted */
     79 
     80     uint64_t n = offset / FEC_BLOCKSIZE;
     81 
     82     return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH],
     83                 data);
     84 }
     85 
     86 /* check if `offset' is within a block expected to contain zeros */
     87 static inline bool is_zero(fec_handle *f, uint64_t offset)
     88 {
     89     verity_info *v = &f->verity;
     90 
     91     if (!v->hash || unlikely(offset >= f->data_size)) {
     92         return false;
     93     }
     94 
     95     uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH;
     96 
     97     if (unlikely(hash_offset >
     98             v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) {
     99         return false;
    100     }
    101 
    102     return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH);
    103 }
    104 
    105 /* reads and decodes a single block starting from `offset', returns the number
    106    of bytes corrected in `errors' */
    107 static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset,
    108         bool use_erasures, uint8_t *ecc_data, size_t *errors)
    109 {
    110     check(offset % FEC_BLOCKSIZE == 0);
    111     ecc_info *e = &f->ecc;
    112 
    113     /* reverse interleaving: calculate the RS block that includes the requested
    114        offset */
    115     uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) *
    116                         e->rounds * FEC_BLOCKSIZE;
    117     int data_index = -1;
    118     int erasures[e->rsn];
    119     int neras = 0;
    120 
    121     /* verity is required to check for erasures */
    122     check(!use_erasures || f->verity.hash);
    123 
    124     for (int i = 0; i < e->rsn; ++i) {
    125         uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn,
    126                                     e->rounds);
    127 
    128         if (interleaved == offset) {
    129             data_index = i;
    130         }
    131 
    132         /* to improve our chances of correcting IO errors, initialize the
    133            buffer to zeros even if we are going to read to it later */
    134         uint8_t bbuf[FEC_BLOCKSIZE] = {0};
    135 
    136         if (likely(interleaved < e->start) && !is_zero(f, interleaved)) {
    137             /* copy raw data to reconstruct the RS block */
    138             if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) {
    139                 warn("failed to read: %s", strerror(errno));
    140 
    141                 /* treat errors as corruption */
    142                 if (use_erasures && neras <= e->roots) {
    143                     erasures[neras++] = i;
    144                 }
    145             } else if (use_erasures && neras <= e->roots &&
    146                             is_erasure(f, interleaved, bbuf)) {
    147                 erasures[neras++] = i;
    148             }
    149         }
    150 
    151         for (int j = 0; j < FEC_BLOCKSIZE; ++j) {
    152             ecc_data[j * FEC_RSM + i] = bbuf[j];
    153         }
    154     }
    155 
    156     check(data_index >= 0);
    157 
    158     size_t nerrs = 0;
    159     uint8_t copy[FEC_RSM];
    160 
    161     for (int i = 0; i < FEC_BLOCKSIZE; ++i) {
    162         /* copy parity data */
    163         if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots,
    164                 e->start + (i + rsb) * e->roots)) {
    165             error("failed to read ecc data: %s", strerror(errno));
    166             return -1;
    167         }
    168 
    169         /* for debugging decoding failures, because decode_rs_char can mangle
    170            ecc_data */
    171         if (unlikely(use_erasures)) {
    172             memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM);
    173         }
    174 
    175         /* decode */
    176         int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras);
    177 
    178         if (unlikely(rc < 0)) {
    179             if (use_erasures) {
    180                 error("RS block %" PRIu64 ": decoding failed (%d erasures)",
    181                     rsb, neras);
    182                 dump("raw RS block", rsb, copy, FEC_RSM);
    183             } else if (!f->verity.hash) {
    184                 warn("RS block %" PRIu64 ": decoding failed", rsb);
    185             } else {
    186                 debug("RS block %" PRIu64 ": decoding failed", rsb);
    187             }
    188 
    189             errno = EIO;
    190             return -1;
    191         } else if (unlikely(rc > 0)) {
    192             check(rc <= (use_erasures ? e->roots : e->roots / 2));
    193             nerrs += rc;
    194         }
    195 
    196         dest[i] = ecc_data[i * FEC_RSM + data_index];
    197     }
    198 
    199     if (nerrs) {
    200         warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs);
    201         *errors += nerrs;
    202     }
    203 
    204     return FEC_BLOCKSIZE;
    205 }
    206 
    207 /* initializes RS decoder and allocates memory for interleaving */
    208 static int ecc_init(fec_handle *f, rs_unique_ptr& rs,
    209         std::unique_ptr<uint8_t[]>& ecc_data)
    210 {
    211     check(f);
    212 
    213     rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots)));
    214 
    215     if (unlikely(!rs)) {
    216         error("failed to initialize RS");
    217         errno = ENOMEM;
    218         return -1;
    219     }
    220 
    221     ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]);
    222 
    223     if (unlikely(!ecc_data)) {
    224         error("failed to allocate ecc buffer");
    225         errno = ENOMEM;
    226         return -1;
    227     }
    228 
    229     return 0;
    230 }
    231 
    232 /* reads `count' bytes from `offset' and corrects possible errors without
    233    erasure detection, returning the number of corrected bytes in `errors' */
    234 static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count,
    235         uint64_t offset, size_t *errors)
    236 {
    237     check(f);
    238     check(dest);
    239     check(offset < f->data_size);
    240     check(offset + count <= f->data_size);
    241     check(errors);
    242 
    243     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
    244 
    245     rs_unique_ptr rs(NULL, free_rs_char);
    246     std::unique_ptr<uint8_t[]> ecc_data;
    247 
    248     if (ecc_init(f, rs, ecc_data) == -1) {
    249         return -1;
    250     }
    251 
    252     uint64_t curr = offset / FEC_BLOCKSIZE;
    253     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
    254     size_t left = count;
    255 
    256     uint8_t data[FEC_BLOCKSIZE];
    257 
    258     while (left > 0) {
    259         /* there's no erasure detection without verity metadata */
    260         if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false,
    261                 ecc_data.get(), errors) == -1) {
    262             return -1;
    263         }
    264 
    265         size_t copy = FEC_BLOCKSIZE - coff;
    266 
    267         if (copy > left) {
    268             copy = left;
    269         }
    270 
    271         memcpy(dest, &data[coff], copy);
    272 
    273         dest += copy;
    274         left -= copy;
    275         coff = 0;
    276         ++curr;
    277     }
    278 
    279     return count;
    280 }
    281 
    282 /* reads `count' bytes from `offset', corrects possible errors with
    283    erasure detection, and verifies the integrity of read data using
    284    verity hash tree; returns the number of corrections in `errors' */
    285 static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count,
    286         uint64_t offset, size_t *errors)
    287 {
    288     check(f);
    289     check(dest);
    290     check(offset < f->data_size);
    291     check(offset + count <= f->data_size);
    292     check(f->verity.hash);
    293     check(errors);
    294 
    295     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
    296 
    297     rs_unique_ptr rs(NULL, free_rs_char);
    298     std::unique_ptr<uint8_t[]> ecc_data;
    299 
    300     if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) {
    301         return -1;
    302     }
    303 
    304     uint64_t curr = offset / FEC_BLOCKSIZE;
    305     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
    306     size_t left = count;
    307     uint8_t data[FEC_BLOCKSIZE];
    308 
    309     uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE -
    310                                 SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH;
    311 
    312     while (left > 0) {
    313         check(curr <= max_hash_block);
    314 
    315         uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH];
    316         uint64_t curr_offset = curr * FEC_BLOCKSIZE;
    317 
    318         bool expect_zeros = is_zero(f, curr_offset);
    319 
    320         /* if we are in read-only mode and expect to read a zero block,
    321            skip reading and just return zeros */
    322         if (f->mode & O_RDONLY && expect_zeros) {
    323             memset(data, 0, FEC_BLOCKSIZE);
    324             goto valid;
    325         }
    326 
    327         /* copy raw data without error correction */
    328         if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) {
    329             error("failed to read: %s", strerror(errno));
    330             return -1;
    331         }
    332 
    333         if (likely(verity_check_block(f, hash, data))) {
    334             goto valid;
    335         }
    336 
    337         /* we know the block is supposed to contain zeros, so return zeros
    338            instead of trying to correct it */
    339         if (expect_zeros) {
    340             memset(data, 0, FEC_BLOCKSIZE);
    341             goto corrected;
    342         }
    343 
    344         if (!f->ecc.start) {
    345             /* fatal error without ecc */
    346             error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
    347                 offset, offset + count, curr);
    348             return -1;
    349         } else {
    350             debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
    351                 offset, offset + count, curr);
    352         }
    353 
    354         /* try to correct without erasures first, because checking for
    355            erasure locations is slower */
    356         if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(),
    357                 errors) == FEC_BLOCKSIZE &&
    358             verity_check_block(f, hash, data)) {
    359             goto corrected;
    360         }
    361 
    362         /* try to correct with erasures */
    363         if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(),
    364                 errors) == FEC_BLOCKSIZE &&
    365             verity_check_block(f, hash, data)) {
    366             goto corrected;
    367         }
    368 
    369         error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64
    370             " (offset %" PRIu64 ") cannot be recovered",
    371             offset, offset + count, curr, curr_offset);
    372         dump("decoded block", curr, data, FEC_BLOCKSIZE);
    373 
    374         errno = EIO;
    375         return -1;
    376 
    377 corrected:
    378         /* update the corrected block to the file if we are in r/w mode */
    379         if (f->mode & O_RDWR &&
    380                 !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) {
    381             error("failed to write: %s", strerror(errno));
    382             return -1;
    383         }
    384 
    385 valid:
    386         size_t copy = FEC_BLOCKSIZE - coff;
    387 
    388         if (copy > left) {
    389             copy = left;
    390         }
    391 
    392         memcpy(dest, &data[coff], copy);
    393 
    394         dest += copy;
    395         left -= copy;
    396         coff = 0;
    397         ++curr;
    398     }
    399 
    400     return count;
    401 }
    402 
    403 /* sets the internal file position to `offset' relative to `whence' */
    404 int fec_seek(struct fec_handle *f, int64_t offset, int whence)
    405 {
    406     check(f);
    407 
    408     if (whence == SEEK_SET) {
    409         if (offset < 0) {
    410             errno = EOVERFLOW;
    411             return -1;
    412         }
    413 
    414         f->pos = offset;
    415     } else if (whence == SEEK_CUR) {
    416         if (offset < 0 && f->pos < (uint64_t)-offset) {
    417             errno = EOVERFLOW;
    418             return -1;
    419         } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) {
    420             errno = EOVERFLOW;
    421             return -1;
    422         }
    423 
    424         f->pos += offset;
    425     } else if (whence == SEEK_END) {
    426         if (offset >= 0) {
    427             errno = ENXIO;
    428             return -1;
    429         } else if ((uint64_t)-offset > f->size) {
    430             errno = EOVERFLOW;
    431             return -1;
    432         }
    433 
    434         f->pos = f->size + offset;
    435     } else {
    436         errno = EINVAL;
    437         return -1;
    438     }
    439 
    440     return 0;
    441 }
    442 
    443 /* reads up to `count' bytes starting from the internal file position using
    444    error correction and integrity validation, if available */
    445 ssize_t fec_read(struct fec_handle *f, void *buf, size_t count)
    446 {
    447     ssize_t rc = fec_pread(f, buf, count, f->pos);
    448 
    449     if (rc > 0) {
    450         check(f->pos < UINT64_MAX - rc);
    451         f->pos += rc;
    452     }
    453 
    454     return rc;
    455 }
    456 
    457 /* for a file with size `max', returns the number of bytes we can read starting
    458    from `offset', up to `count' bytes */
    459 static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max)
    460 {
    461     if (offset >= max) {
    462         return 0;
    463     } else if (offset > max - count) {
    464         return (size_t)(max - offset);
    465     }
    466 
    467     return count;
    468 }
    469 
    470 /* reads `count' bytes from `f->fd' starting from `offset', and copies the
    471    data to `buf' */
    472 bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset)
    473 {
    474     check(f);
    475     check(buf);
    476 
    477     uint8_t *p = (uint8_t *)buf;
    478     size_t remaining = count;
    479 
    480     while (remaining > 0) {
    481         ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset));
    482 
    483         if (n <= 0) {
    484             return false;
    485         }
    486 
    487         p += n;
    488         remaining -= n;
    489         offset += n;
    490     }
    491 
    492     return true;
    493 }
    494 
    495 /* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */
    496 bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset)
    497 {
    498     check(f);
    499     check(buf);
    500 
    501     const uint8_t *p = (const uint8_t *)buf;
    502     size_t remaining = count;
    503 
    504     while (remaining > 0) {
    505         ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset));
    506 
    507         if (n <= 0) {
    508             return false;
    509         }
    510 
    511         p += n;
    512         remaining -= n;
    513         offset += n;
    514     }
    515 
    516     return true;
    517 }
    518 
    519 /* reads up to `count' bytes starting from `offset' using error correction and
    520    integrity validation, if available */
    521 ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count,
    522         uint64_t offset)
    523 {
    524     check(f);
    525     check(buf);
    526 
    527     if (unlikely(offset > UINT64_MAX - count)) {
    528         errno = EOVERFLOW;
    529         return -1;
    530     }
    531 
    532     if (f->verity.hash) {
    533         return process(f, (uint8_t *)buf,
    534                     get_max_count(offset, count, f->data_size), offset,
    535                     verity_read);
    536     } else if (f->ecc.start) {
    537         check(f->ecc.start < f->size);
    538 
    539         count = get_max_count(offset, count, f->data_size);
    540         ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read);
    541 
    542         if (rc >= 0) {
    543             return rc;
    544         }
    545 
    546         /* return raw data if pure ecc read fails; due to interleaving
    547            the specific blocks the caller wants may still be fine */
    548     } else {
    549         count = get_max_count(offset, count, f->size);
    550     }
    551 
    552     if (raw_pread(f, buf, count, offset)) {
    553         return count;
    554     }
    555 
    556     return -1;
    557 }
    558