Home | History | Annotate | Download | only in libfec
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <stdlib.h>
     19 #include <sys/mman.h>
     20 
     21 extern "C" {
     22     #include <fec.h>
     23 }
     24 
     25 #include "fec_private.h"
     26 
     27 using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>;
     28 
     29 /* prints a hexdump of `data' using warn(...) */
     30 static void dump(const char *name, uint64_t value, const uint8_t *data,
     31         size_t size)
     32 {
     33     const int bytes_per_line = 16;
     34     char hex[bytes_per_line * 3 + 1];
     35     char prn[bytes_per_line + 1];
     36 
     37     warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size);
     38 
     39     if (!data) {
     40         warn("    (null)");
     41         return;
     42     }
     43 
     44     for (size_t n = 0; n < size; n += bytes_per_line) {
     45         memset(hex, 0, sizeof(hex));
     46         memset(prn, 0, sizeof(prn));
     47 
     48         for (size_t m = 0; m < bytes_per_line; ++m) {
     49             if (n + m < size) {
     50                 sprintf(&hex[m * 3], "%02x ", data[n + m]);
     51 
     52                 if (isprint(data[n + m])) {
     53                     prn[m] = data[n + m];
     54                 } else {
     55                     prn[m] = '.';
     56                 }
     57             } else {
     58                 strcpy(&hex[m * 3], "   ");
     59             }
     60         }
     61 
     62         warn("    %04zu   %s  %s", n, hex, prn);
     63     }
     64 }
     65 
     66 /* checks if `offset' is within a corrupted block */
     67 static inline bool is_erasure(fec_handle *f, uint64_t offset,
     68         const uint8_t *data)
     69 {
     70     if (unlikely(offset >= f->data_size)) {
     71         return false;
     72     }
     73 
     74     /* ideally, we would like to know if a specific byte on this block has
     75        been corrupted, but knowing whether any of them is can be useful as
     76        well, because often the entire block is corrupted */
     77 
     78     uint64_t n = offset / FEC_BLOCKSIZE;
     79 
     80     return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH],
     81                 data);
     82 }
     83 
     84 /* check if `offset' is within a block expected to contain zeros */
     85 static inline bool is_zero(fec_handle *f, uint64_t offset)
     86 {
     87     verity_info *v = &f->verity;
     88 
     89     if (!v->hash || unlikely(offset >= f->data_size)) {
     90         return false;
     91     }
     92 
     93     uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH;
     94 
     95     if (unlikely(hash_offset >
     96             v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) {
     97         return false;
     98     }
     99 
    100     return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH);
    101 }
    102 
    103 /* reads and decodes a single block starting from `offset', returns the number
    104    of bytes corrected in `errors' */
    105 static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset,
    106         bool use_erasures, uint8_t *ecc_data, size_t *errors)
    107 {
    108     check(offset % FEC_BLOCKSIZE == 0);
    109     ecc_info *e = &f->ecc;
    110 
    111     /* reverse interleaving: calculate the RS block that includes the requested
    112        offset */
    113     uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) *
    114                         e->rounds * FEC_BLOCKSIZE;
    115     int data_index = -1;
    116     int erasures[e->rsn];
    117     int neras = 0;
    118 
    119     /* verity is required to check for erasures */
    120     check(!use_erasures || f->verity.hash);
    121 
    122     for (int i = 0; i < e->rsn; ++i) {
    123         uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn,
    124                                     e->rounds);
    125 
    126         if (interleaved == offset) {
    127             data_index = i;
    128         }
    129 
    130         /* to improve our chances of correcting IO errors, initialize the
    131            buffer to zeros even if we are going to read to it later */
    132         uint8_t bbuf[FEC_BLOCKSIZE] = {0};
    133 
    134         if (likely(interleaved < e->start) && !is_zero(f, interleaved)) {
    135             /* copy raw data to reconstruct the RS block */
    136             if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) {
    137                 warn("failed to read: %s", strerror(errno));
    138 
    139                 /* treat errors as corruption */
    140                 if (use_erasures && neras <= e->roots) {
    141                     erasures[neras++] = i;
    142                 }
    143             } else if (use_erasures && neras <= e->roots &&
    144                             is_erasure(f, interleaved, bbuf)) {
    145                 erasures[neras++] = i;
    146             }
    147         }
    148 
    149         for (int j = 0; j < FEC_BLOCKSIZE; ++j) {
    150             ecc_data[j * FEC_RSM + i] = bbuf[j];
    151         }
    152     }
    153 
    154     check(data_index >= 0);
    155 
    156     size_t nerrs = 0;
    157     uint8_t copy[FEC_RSM];
    158 
    159     for (int i = 0; i < FEC_BLOCKSIZE; ++i) {
    160         /* copy parity data */
    161         if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots,
    162                 e->start + (i + rsb) * e->roots)) {
    163             error("failed to read ecc data: %s", strerror(errno));
    164             return -1;
    165         }
    166 
    167         /* for debugging decoding failures, because decode_rs_char can mangle
    168            ecc_data */
    169         if (unlikely(use_erasures)) {
    170             memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM);
    171         }
    172 
    173         /* decode */
    174         int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras);
    175 
    176         if (unlikely(rc < 0)) {
    177             if (use_erasures) {
    178                 error("RS block %" PRIu64 ": decoding failed (%d erasures)",
    179                     rsb, neras);
    180                 dump("raw RS block", rsb, copy, FEC_RSM);
    181             } else if (!f->verity.hash) {
    182                 warn("RS block %" PRIu64 ": decoding failed", rsb);
    183             } else {
    184                 debug("RS block %" PRIu64 ": decoding failed", rsb);
    185             }
    186 
    187             errno = EIO;
    188             return -1;
    189         } else if (unlikely(rc > 0)) {
    190             check(rc <= (use_erasures ? e->roots : e->roots / 2));
    191             nerrs += rc;
    192         }
    193 
    194         dest[i] = ecc_data[i * FEC_RSM + data_index];
    195     }
    196 
    197     if (nerrs) {
    198         warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs);
    199         *errors += nerrs;
    200     }
    201 
    202     return FEC_BLOCKSIZE;
    203 }
    204 
    205 /* initializes RS decoder and allocates memory for interleaving */
    206 static int ecc_init(fec_handle *f, rs_unique_ptr& rs,
    207         std::unique_ptr<uint8_t[]>& ecc_data)
    208 {
    209     check(f);
    210 
    211     rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots)));
    212 
    213     if (unlikely(!rs)) {
    214         error("failed to initialize RS");
    215         errno = ENOMEM;
    216         return -1;
    217     }
    218 
    219     ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]);
    220 
    221     if (unlikely(!ecc_data)) {
    222         error("failed to allocate ecc buffer");
    223         errno = ENOMEM;
    224         return -1;
    225     }
    226 
    227     return 0;
    228 }
    229 
    230 /* reads `count' bytes from `offset' and corrects possible errors without
    231    erasure detection, returning the number of corrected bytes in `errors' */
    232 static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count,
    233         uint64_t offset, size_t *errors)
    234 {
    235     check(f);
    236     check(dest);
    237     check(offset < f->data_size);
    238     check(offset + count <= f->data_size);
    239     check(errors);
    240 
    241     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
    242 
    243     rs_unique_ptr rs(NULL, free_rs_char);
    244     std::unique_ptr<uint8_t[]> ecc_data;
    245 
    246     if (ecc_init(f, rs, ecc_data) == -1) {
    247         return -1;
    248     }
    249 
    250     uint64_t curr = offset / FEC_BLOCKSIZE;
    251     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
    252     size_t left = count;
    253 
    254     uint8_t data[FEC_BLOCKSIZE];
    255 
    256     while (left > 0) {
    257         /* there's no erasure detection without verity metadata */
    258         if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false,
    259                 ecc_data.get(), errors) == -1) {
    260             return -1;
    261         }
    262 
    263         size_t copy = FEC_BLOCKSIZE - coff;
    264 
    265         if (copy > left) {
    266             copy = left;
    267         }
    268 
    269         memcpy(dest, &data[coff], copy);
    270 
    271         dest += copy;
    272         left -= copy;
    273         coff = 0;
    274         ++curr;
    275     }
    276 
    277     return count;
    278 }
    279 
    280 /* reads `count' bytes from `offset', corrects possible errors with
    281    erasure detection, and verifies the integrity of read data using
    282    verity hash tree; returns the number of corrections in `errors' */
    283 static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count,
    284         uint64_t offset, size_t *errors)
    285 {
    286     check(f);
    287     check(dest);
    288     check(offset < f->data_size);
    289     check(offset + count <= f->data_size);
    290     check(f->verity.hash);
    291     check(errors);
    292 
    293     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
    294 
    295     rs_unique_ptr rs(NULL, free_rs_char);
    296     std::unique_ptr<uint8_t[]> ecc_data;
    297 
    298     if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) {
    299         return -1;
    300     }
    301 
    302     uint64_t curr = offset / FEC_BLOCKSIZE;
    303     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
    304     size_t left = count;
    305     uint8_t data[FEC_BLOCKSIZE];
    306 
    307     uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE -
    308                                 SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH;
    309 
    310     while (left > 0) {
    311         check(curr <= max_hash_block);
    312 
    313         uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH];
    314         uint64_t curr_offset = curr * FEC_BLOCKSIZE;
    315 
    316         bool expect_zeros = is_zero(f, curr_offset);
    317 
    318         /* if we are in read-only mode and expect to read a zero block,
    319            skip reading and just return zeros */
    320         if (f->mode & O_RDONLY && expect_zeros) {
    321             memset(data, 0, FEC_BLOCKSIZE);
    322             goto valid;
    323         }
    324 
    325         /* copy raw data without error correction */
    326         if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) {
    327             error("failed to read: %s", strerror(errno));
    328             return -1;
    329         }
    330 
    331         if (likely(verity_check_block(f, hash, data))) {
    332             goto valid;
    333         }
    334 
    335         /* we know the block is supposed to contain zeros, so return zeros
    336            instead of trying to correct it */
    337         if (expect_zeros) {
    338             memset(data, 0, FEC_BLOCKSIZE);
    339             goto corrected;
    340         }
    341 
    342         if (!f->ecc.start) {
    343             /* fatal error without ecc */
    344             error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
    345                 offset, offset + count, curr);
    346             return -1;
    347         } else {
    348             debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
    349                 offset, offset + count, curr);
    350         }
    351 
    352         /* try to correct without erasures first, because checking for
    353            erasure locations is slower */
    354         if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(),
    355                 errors) == FEC_BLOCKSIZE &&
    356             verity_check_block(f, hash, data)) {
    357             goto corrected;
    358         }
    359 
    360         /* try to correct with erasures */
    361         if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(),
    362                 errors) == FEC_BLOCKSIZE &&
    363             verity_check_block(f, hash, data)) {
    364             goto corrected;
    365         }
    366 
    367         error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64
    368             " (offset %" PRIu64 ") cannot be recovered",
    369             offset, offset + count, curr, curr_offset);
    370         dump("decoded block", curr, data, FEC_BLOCKSIZE);
    371 
    372         errno = EIO;
    373         return -1;
    374 
    375 corrected:
    376         /* update the corrected block to the file if we are in r/w mode */
    377         if (f->mode & O_RDWR &&
    378                 !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) {
    379             error("failed to write: %s", strerror(errno));
    380             return -1;
    381         }
    382 
    383 valid:
    384         size_t copy = FEC_BLOCKSIZE - coff;
    385 
    386         if (copy > left) {
    387             copy = left;
    388         }
    389 
    390         memcpy(dest, &data[coff], copy);
    391 
    392         dest += copy;
    393         left -= copy;
    394         coff = 0;
    395         ++curr;
    396     }
    397 
    398     return count;
    399 }
    400 
    401 /* sets the internal file position to `offset' relative to `whence' */
    402 int fec_seek(struct fec_handle *f, int64_t offset, int whence)
    403 {
    404     check(f);
    405 
    406     if (whence == SEEK_SET) {
    407         if (offset < 0) {
    408             errno = EOVERFLOW;
    409             return -1;
    410         }
    411 
    412         f->pos = offset;
    413     } else if (whence == SEEK_CUR) {
    414         if (offset < 0 && f->pos < (uint64_t)-offset) {
    415             errno = EOVERFLOW;
    416             return -1;
    417         } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) {
    418             errno = EOVERFLOW;
    419             return -1;
    420         }
    421 
    422         f->pos += offset;
    423     } else if (whence == SEEK_END) {
    424         if (offset >= 0) {
    425             errno = ENXIO;
    426             return -1;
    427         } else if ((uint64_t)-offset > f->size) {
    428             errno = EOVERFLOW;
    429             return -1;
    430         }
    431 
    432         f->pos = f->size + offset;
    433     } else {
    434         errno = EINVAL;
    435         return -1;
    436     }
    437 
    438     return 0;
    439 }
    440 
    441 /* reads up to `count' bytes starting from the internal file position using
    442    error correction and integrity validation, if available */
    443 ssize_t fec_read(struct fec_handle *f, void *buf, size_t count)
    444 {
    445     ssize_t rc = fec_pread(f, buf, count, f->pos);
    446 
    447     if (rc > 0) {
    448         check(f->pos < UINT64_MAX - rc);
    449         f->pos += rc;
    450     }
    451 
    452     return rc;
    453 }
    454 
    455 /* for a file with size `max', returns the number of bytes we can read starting
    456    from `offset', up to `count' bytes */
    457 static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max)
    458 {
    459     if (offset >= max) {
    460         return 0;
    461     } else if (offset > max - count) {
    462         return (size_t)(max - offset);
    463     }
    464 
    465     return count;
    466 }
    467 
    468 /* reads `count' bytes from `f->fd' starting from `offset', and copies the
    469    data to `buf' */
    470 bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset)
    471 {
    472     check(f);
    473     check(buf);
    474 
    475     uint8_t *p = (uint8_t *)buf;
    476     size_t remaining = count;
    477 
    478     while (remaining > 0) {
    479         ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset));
    480 
    481         if (n <= 0) {
    482             return false;
    483         }
    484 
    485         p += n;
    486         remaining -= n;
    487         offset += n;
    488     }
    489 
    490     return true;
    491 }
    492 
    493 /* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */
    494 bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset)
    495 {
    496     check(f);
    497     check(buf);
    498 
    499     const uint8_t *p = (const uint8_t *)buf;
    500     size_t remaining = count;
    501 
    502     while (remaining > 0) {
    503         ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset));
    504 
    505         if (n <= 0) {
    506             return false;
    507         }
    508 
    509         p += n;
    510         remaining -= n;
    511         offset += n;
    512     }
    513 
    514     return true;
    515 }
    516 
    517 /* reads up to `count' bytes starting from `offset' using error correction and
    518    integrity validation, if available */
    519 ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count,
    520         uint64_t offset)
    521 {
    522     check(f);
    523     check(buf);
    524 
    525     if (unlikely(offset > UINT64_MAX - count)) {
    526         errno = EOVERFLOW;
    527         return -1;
    528     }
    529 
    530     if (f->verity.hash) {
    531         return process(f, (uint8_t *)buf,
    532                     get_max_count(offset, count, f->data_size), offset,
    533                     verity_read);
    534     } else if (f->ecc.start) {
    535         check(f->ecc.start < f->size);
    536 
    537         count = get_max_count(offset, count, f->data_size);
    538         ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read);
    539 
    540         if (rc >= 0) {
    541             return rc;
    542         }
    543 
    544         /* return raw data if pure ecc read fails; due to interleaving
    545            the specific blocks the caller wants may still be fine */
    546     } else {
    547         count = get_max_count(offset, count, f->size);
    548     }
    549 
    550     if (raw_pread(f, buf, count, offset)) {
    551         return count;
    552     }
    553 
    554     return -1;
    555 }
    556