Home | History | Annotate | Download | only in image_type_recognition
      1 // Copyright 2015 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 ////////////////////////////////////////////////////////////////////////////////
     16 //
     17 // This file implements the image type recognition algorithm. Functions, which
     18 // will check each single image type, are implemented based on the comparisons
     19 // of magic numbers or signature strings. Other checks (e.g endianness, general
     20 // tiff magic number "42", etc.) could also be used in some of those functions
     21 // to make the type recognition more stable. Those checks are designed
     22 // according to the format spcifications and our own experiments. Notice that
     23 // the magic numbers and signature strings may have different binary values
     24 // according to different endiannesses.
     25 #include "src/image_type_recognition/image_type_recognition_lite.h"
     26 
     27 #include <algorithm>
     28 #include <cassert>
     29 #include <string>
     30 #include <vector>
     31 
     32 #include "src/binary_parse/range_checked_byte_ptr.h"
     33 
     34 namespace piex {
     35 namespace image_type_recognition {
     36 namespace {
     37 
     38 using std::string;
     39 using binary_parse::MemoryStatus;
     40 using binary_parse::RangeCheckedBytePtr;
     41 
     42 // Base class for checking image type. For each image type, one should create an
     43 // inherited class and do the implementation.
     44 class TypeChecker {
     45  public:
     46   // Comparing function, whihc is used for sorting.
     47   static bool Compare(const TypeChecker* a, const TypeChecker* b) {
     48     assert(a);
     49     assert(b);
     50     return a->RequestedSize() < b->RequestedSize();
     51   }
     52 
     53   virtual ~TypeChecker() {}
     54 
     55   // Returns the type of current checker.
     56   virtual RawImageTypes Type() const = 0;
     57 
     58   // Returns the requested data size (in bytes) for current checker. The checker
     59   // guarantees that it will not read more than this size.
     60   virtual size_t RequestedSize() const = 0;
     61 
     62   // Checks if source data belongs to current checker type.
     63   virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
     64 
     65  protected:
     66   // Limits the source length to the RequestedSize(), using it guarantees that
     67   // we will not read more than this size from the source.
     68   RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
     69     return source.pointerToSubArray(0 /* pos */, RequestedSize());
     70   }
     71 };
     72 
     73 // Check if the uint16 value at (source + offset) is equal to the target value.
     74 bool CheckUInt16Value(const RangeCheckedBytePtr& source,
     75                       const size_t source_offset, const bool use_big_endian,
     76                       const unsigned short target_value) {  // NOLINT
     77   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
     78   const unsigned short value = binary_parse::Get16u(  // NOLINT
     79       source + source_offset, use_big_endian, &status);
     80   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
     81     return false;
     82   }
     83   return (target_value == value);
     84 }
     85 
     86 // Check if the uint32 value at (source + offset) is equal to the target value.
     87 bool CheckUInt32Value(const RangeCheckedBytePtr& source,
     88                       const size_t source_offset, const bool use_big_endian,
     89                       const unsigned int target_value) {
     90   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
     91   const unsigned int value =
     92       binary_parse::Get32u(source + source_offset, use_big_endian, &status);
     93   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
     94     return false;
     95   }
     96   return (target_value == value);
     97 }
     98 
     99 // Determine the endianness. The return value is NOT the endianness indicator,
    100 // it's just that this function was successful.
    101 bool DetermineEndianness(const RangeCheckedBytePtr& source,
    102                          bool* is_big_endian) {
    103   if (source.remainingLength() < 2) {
    104     return false;
    105   }
    106 
    107   if (source[0] == 0x49 && source[1] == 0x49) {
    108     *is_big_endian = false;
    109   } else if (source[0] == 0x4D && source[1] == 0x4D) {
    110     *is_big_endian = true;
    111   } else {
    112     return false;
    113   }
    114   return true;
    115 }
    116 
    117 // Check if signature string can match to the same length string start from
    118 // (source + offset). The signature string will be used as longer magic number
    119 // series.
    120 bool IsSignatureMatched(const RangeCheckedBytePtr& source,
    121                         const size_t source_offset, const string& signature) {
    122   return source.substr(source_offset, signature.size()) == signature;
    123 }
    124 
    125 // Check if signature is found in [source + offset, source + offset + range].
    126 bool IsSignatureFound(const RangeCheckedBytePtr& source,
    127                       const size_t search_offset, const size_t search_range,
    128                       const string& signature, size_t* first_matched) {
    129   if (source.remainingLength() < search_offset + search_range) {
    130     return false;
    131   }
    132 
    133   // The index must be in range [offset, offset + range - sizeof(signature)], so
    134   // that it can guarantee that it will not read outside of range.
    135   for (size_t i = search_offset;
    136        i < search_offset + search_range - signature.size(); ++i) {
    137     if (IsSignatureMatched(source, i, signature)) {
    138       if (first_matched) {
    139         *first_matched = i;
    140       }
    141       return true;
    142     }
    143   }
    144   return false;
    145 }
    146 
    147 // Sony RAW format.
    148 class ArwTypeChecker : public TypeChecker {
    149  public:
    150   virtual RawImageTypes Type() const { return kArwImage; }
    151 
    152   virtual size_t RequestedSize() const { return 5000; }
    153 
    154   // Check multiple points:
    155   // 1. valid endianness at the beginning of the file;
    156   // 2. correct tiff magic number at the (offset == 8) position of the file;
    157   // 3. signature "SONY" in first requested bytes;
    158   // 4. correct signature for (section + version) in first requested bytes.
    159   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    160     RangeCheckedBytePtr limited_source = LimitSource(source);
    161 
    162     bool use_big_endian;
    163     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    164       return false;
    165     }
    166 
    167     const unsigned short kTiffMagic = 0x2A;  // NOLINT
    168     const unsigned int kTiffOffset = 8;
    169     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    170                           kTiffMagic) ||
    171         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
    172                           kTiffOffset)) {
    173       return false;
    174     }
    175 
    176     // Search for kSignatureSony in first requested bytes
    177     const string kSignatureSony("SONY");
    178     if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    179                           kSignatureSony, NULL)) {
    180       return false;
    181     }
    182 
    183     // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
    184     // requested bytes
    185     const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
    186     const int kSignatureVersionsSize = 5;
    187     const string kSignatureVersions[kSignatureVersionsSize] = {
    188         string("\x02\x00", 2),  // ARW 1.0
    189         string("\x03\x00", 2),  // ARW 2.0
    190         string("\x03\x01", 2),  // ARW 2.1
    191         string("\x03\x02", 2),  // ARW 2.2
    192         string("\x03\x03", 2),  // ARW 2.3
    193     };
    194     bool matched = false;
    195     for (int i = 0; i < kSignatureVersionsSize; ++i) {
    196       matched = matched || IsSignatureFound(
    197                                limited_source, 0 /* offset */, RequestedSize(),
    198                                kSignatureSection + kSignatureVersions[i], NULL);
    199     }
    200     return matched;
    201   }
    202 };
    203 
    204 // Canon RAW (CR2 extension).
    205 class Cr2TypeChecker : public TypeChecker {
    206  public:
    207   virtual RawImageTypes Type() const { return kCr2Image; }
    208 
    209   virtual size_t RequestedSize() const { return 16; }
    210 
    211   // Check multiple points:
    212   // 1. valid endianness at the beginning of the file;
    213   // 2. magic number "42" at the (offset == 2) position of the file;
    214   // 3. signature "CR2" at the (offset == 8) position of the file.
    215   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    216     RangeCheckedBytePtr limited_source = LimitSource(source);
    217 
    218     bool use_big_endian;
    219     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    220       return false;
    221     }
    222 
    223     const unsigned short kTag = 42;  // NOLINT
    224     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    225                           kTag)) {
    226       return false;
    227     }
    228 
    229     const string kSignature("CR\2\0", 4);
    230     return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
    231   }
    232 };
    233 
    234 // Canon RAW (CRW extension).
    235 class CrwTypeChecker : public TypeChecker {
    236  public:
    237   virtual RawImageTypes Type() const { return kCrwImage; }
    238 
    239   virtual size_t RequestedSize() const { return 14; }
    240 
    241   // Check only the signature at the (offset == 6) position of the file.
    242   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    243     RangeCheckedBytePtr limited_source = LimitSource(source);
    244 
    245     bool use_big_endian;
    246     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    247       return false;
    248     }
    249 
    250     string signature;
    251     if (use_big_endian) {
    252       signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
    253     } else {
    254       signature = string("HEAPCCDR");
    255     }
    256     return IsSignatureMatched(limited_source, 6 /* offset */, signature);
    257   }
    258 };
    259 
    260 // Kodak RAW.
    261 class DcrTypeChecker : public TypeChecker {
    262  public:
    263   virtual RawImageTypes Type() const { return kDcrImage; }
    264 
    265   virtual size_t RequestedSize() const { return 5000; }
    266 
    267   // Check two different cases, only need to fulfill one of the two:
    268   // 1. signature at the (offset == 16) position of the file;
    269   // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
    270   // first requested bytes of the file.
    271   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    272     RangeCheckedBytePtr limited_source = LimitSource(source);
    273 
    274     bool use_big_endian;
    275     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    276       return false;
    277     }
    278 
    279     // Case 1: has signature
    280     const string kSignature(
    281         "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
    282     if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
    283       return true;
    284     }
    285 
    286     // Case 2: search for tags in first requested bytes
    287     string kIfdTags[2];
    288     if (use_big_endian) {
    289       kIfdTags[0] = string("\x03\xe9\x00\x02", 4);  // OriginalFileName
    290       kIfdTags[1] = string("\x0c\xe5\x00\x02", 4);  // FirmwareVersion
    291     } else {
    292       kIfdTags[0] = string("\xe9\x03\x02\x00", 4);  // OriginalFileName
    293       kIfdTags[1] = string("\xe5\x0c\x02\x00", 4);  // FirmwareVersion
    294     }
    295     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    296                             kIfdTags[0], NULL) &&
    297            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    298                             kIfdTags[1], NULL);
    299   }
    300 };
    301 
    302 // Digital Negative RAW.
    303 class DngTypeChecker : public TypeChecker {
    304  public:
    305   virtual RawImageTypes Type() const { return kDngImage; }
    306 
    307   virtual size_t RequestedSize() const { return 1024; }
    308 
    309   // Check multiple points:
    310   // 1. valid endianness at the beginning of the file;
    311   // 2. at least two dng specific tags in the first requested bytes of the
    312   // file
    313   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    314     RangeCheckedBytePtr limited_source = LimitSource(source);
    315 
    316     bool use_big_endian;
    317     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    318       return false;
    319     }
    320 
    321     // Search tags in first requested bytes and verify the order of them.
    322     const int kTagsCount = 5;
    323     string dng_tags[kTagsCount];
    324     if (use_big_endian) {
    325       dng_tags[0] =
    326           string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8);  // tag: 50706
    327       dng_tags[1] =
    328           string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8);  // tag: 50707
    329       dng_tags[2] = string("\xc6\x14\x00\x02", 4);        // tag: 50708
    330       dng_tags[3] = string("\xc6\x20", 2);                // tag: 50720
    331       dng_tags[4] =
    332           string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8);  // tag: 50733
    333     } else {
    334       dng_tags[0] =
    335           string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50706
    336       dng_tags[1] =
    337           string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50707
    338       dng_tags[2] = string("\x14\xc6\x02\x00", 4);        // tag: 50708
    339       dng_tags[3] = string("\x20\xc6", 2);                // tag: 50720
    340       dng_tags[4] =
    341           string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8);  // tag: 50733
    342     }
    343     int tags_found = 0;
    344     for (int i = 0; i < kTagsCount; ++i) {
    345       if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    346                            dng_tags[i], NULL)) {
    347         tags_found++;
    348       }
    349     }
    350     return tags_found >= 2;
    351   }
    352 };
    353 
    354 // Kodak RAW.
    355 class KdcTypeChecker : public TypeChecker {
    356  public:
    357   virtual RawImageTypes Type() const { return kKdcImage; }
    358 
    359   virtual size_t RequestedSize() const { return 5000; }
    360 
    361   // Check two points:
    362   // 1. valid endianness at the beginning of the file;
    363   // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
    364   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    365     RangeCheckedBytePtr limited_source = LimitSource(source);
    366 
    367     bool use_big_endian;
    368     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    369       return false;
    370     }
    371 
    372     // Search in first requested bytes
    373     const size_t kIfdTagsSize = 2;
    374     string kIfdTags[kIfdTagsSize];
    375     if (use_big_endian) {
    376       kIfdTags[0] = string("\xfa\x0d\x00\x01", 4);  // WhiteBalance
    377       kIfdTags[1] = string("\xfa\x00\x00\x02", 4);  // SerialNumber
    378     } else {
    379       kIfdTags[0] = string("\x0d\xfa\x01\x00", 4);  // WhiteBalance
    380       kIfdTags[1] = string("\x00\xfa\x02\x00", 4);  // SerialNumber
    381     }
    382 
    383     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    384                             kIfdTags[0], NULL) &&
    385            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    386                             kIfdTags[1], NULL);
    387   }
    388 };
    389 
    390 // Leaf RAW.
    391 class MosTypeChecker : public TypeChecker {
    392  public:
    393   virtual RawImageTypes Type() const { return kMosImage; }
    394 
    395   virtual size_t RequestedSize() const { return 5000; }
    396 
    397   // Check two points:
    398   // 1. valid endianness at the beginning of the file;
    399   // 2. signature "PKTS    " in the first requested bytes. Note the
    400   // "whitespace". It's important as they are special binary values.
    401   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    402     RangeCheckedBytePtr limited_source = LimitSource(source);
    403 
    404     bool use_big_endian;
    405     if (!DetermineEndianness(source, &use_big_endian)) {
    406       return false;
    407     }
    408 
    409     // Search kSignaturePKTS in first requested bytes
    410     const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
    411     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    412                             kSignaturePKTS, NULL);
    413   }
    414 };
    415 
    416 // Minolta RAW.
    417 class MrwTypeChecker : public TypeChecker {
    418  public:
    419   virtual RawImageTypes Type() const { return kMrwImage; }
    420 
    421   virtual size_t RequestedSize() const { return 4; }
    422 
    423   // Check only the signature at the beginning of the file.
    424   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    425     // Limits the source length to the RequestedSize(), using it guarantees that
    426     // we will not read more than this size from the source.
    427     RangeCheckedBytePtr limited_source =
    428         source.pointerToSubArray(0 /* pos */, RequestedSize());
    429 
    430     const string kSignature("\0MRM", 4);
    431     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
    432   }
    433 };
    434 
    435 // Check if the file contains a NRW signature "NRW   " in the first requested
    436 // bytes. Note the "whitespace". It's important as they are special binary
    437 // values.
    438 const size_t kRequestedSizeForNrwSignature = 4000;
    439 bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
    440   // Search for kSignatureNrw.
    441   const string kSignatureNrw("NRW\x20\x20\x20", 6);
    442   return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
    443                           kSignatureNrw, NULL);
    444 }
    445 
    446 // Checks if the file contains the signatures for Nikon formats:
    447 // * the general Nikon singature "NIKON" string.
    448 // * the ReferenceBlackWhite tag.
    449 const size_t kRequestedSizeForNikonSignatures = 4000;
    450 bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
    451                              const bool use_big_endian) {
    452   const string kSignatureNikon("NIKON");
    453   const string kReferenceBlackWhiteTag = use_big_endian
    454                                              ? string("\x02\x14\x00\x05", 4)
    455                                              : string("\x14\x02\x05\x00", 4);
    456   const std::vector<string> kSignatures = {kSignatureNikon,
    457                                            kReferenceBlackWhiteTag};
    458   for (auto const& signature : kSignatures) {
    459     if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
    460                           signature, NULL)) {
    461       return false;
    462     }
    463   }
    464   return true;
    465 }
    466 
    467 // Nikon RAW (NEF extension).
    468 class NefTypeChecker : public TypeChecker {
    469  public:
    470   virtual RawImageTypes Type() const { return kNefImage; }
    471 
    472   virtual size_t RequestedSize() const {
    473     return std::max(kRequestedSizeForNikonSignatures,
    474                     kRequestedSizeForNrwSignature);
    475   }
    476 
    477   // Check multiple points:
    478   // 1. valid endianness at the beginning of the file;
    479   // 2. magic number at the (offset == 2) position of the file;
    480   // 3. the signature "NIKON" in the requested bytes of the file;
    481   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
    482   // 5. does not contain the NRW signature. We may also check a special
    483   // signature "RAW   " similar to the NRW case, but we got issues in some
    484   // special images that the signature locates in the middle of the file, and it
    485   // costs too  long time to check;
    486   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    487     RangeCheckedBytePtr limited_source = LimitSource(source);
    488 
    489     bool use_big_endian;
    490     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    491       return false;
    492     }
    493 
    494     const unsigned short kTiffMagic = 0x2A;  // NOLINT
    495     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    496                           kTiffMagic)) {
    497       return false;
    498     }
    499 
    500     return ContainsNikonSignatures(limited_source, use_big_endian) &&
    501            !ContainsNrwSignature(limited_source);  // not NRW
    502   }
    503 };
    504 
    505 // Nikon RAW (NRW extension).
    506 class NrwTypeChecker : public TypeChecker {
    507  public:
    508   virtual RawImageTypes Type() const { return kNrwImage; }
    509 
    510   virtual size_t RequestedSize() const {
    511     return std::max(kRequestedSizeForNikonSignatures,
    512                     kRequestedSizeForNrwSignature);
    513   }
    514 
    515   // Check multiple points:
    516   // 1. valid endianness at the beginning of the file;
    517   // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
    518   // file;
    519   // 3. the signature "NIKON" in the first requested bytes of the file;
    520   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
    521   // 5. contains the NRW signature;
    522   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    523     RangeCheckedBytePtr limited_source = LimitSource(source);
    524 
    525     bool use_big_endian;
    526     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    527       return false;
    528     }
    529 
    530     const unsigned short kTiffMagic = 0x2A;  // NOLINT
    531     const unsigned int kTiffOffset = 8;
    532     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    533                           kTiffMagic) ||
    534         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
    535                           kTiffOffset)) {
    536       return false;
    537     }
    538 
    539     return ContainsNikonSignatures(limited_source, use_big_endian) &&
    540            ContainsNrwSignature(limited_source);
    541   }
    542 };
    543 
    544 // Olympus RAW.
    545 class OrfTypeChecker : public TypeChecker {
    546  public:
    547   virtual RawImageTypes Type() const { return kOrfImage; }
    548 
    549   virtual size_t RequestedSize() const { return 3000; }
    550 
    551   // Check multiple points:
    552   // 1. valid endianness at the beginning of the file;
    553   // 2. tag at the (offset == 2) position of the file;
    554   // 3. signature "OLYMP" in the first requested bytes.
    555   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    556     RangeCheckedBytePtr limited_source = LimitSource(source);
    557 
    558     bool use_big_endian;
    559     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    560       return false;
    561     }
    562 
    563     const size_t kTagSize = 2;
    564     const unsigned short kTag[kTagSize] = {0x4F52, 0x5352};  // NOLINT
    565     if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    566                            kTag[0]) ||
    567           CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    568                            kTag[1]))) {
    569       return false;
    570     }
    571 
    572     // Search for kSignatureOlymp in first requested bytes
    573     const string kSignatureOlymp("OLYMP");
    574     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    575                             kSignatureOlymp, NULL);
    576   }
    577 };
    578 
    579 // Pentax RAW.
    580 class PefTypeChecker : public TypeChecker {
    581  public:
    582   virtual RawImageTypes Type() const { return kPefImage; }
    583 
    584   virtual size_t RequestedSize() const { return 1280; }
    585 
    586   // Check multiple points:
    587   // 1. valid big endianness at the beginning of the file;
    588   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
    589   // 3. signature "AOC   " or "PENTAX  " in first requested bytes.
    590   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    591     RangeCheckedBytePtr limited_source = LimitSource(source);
    592 
    593     bool use_big_endian;
    594     if (!DetermineEndianness(limited_source, &use_big_endian)) {
    595       return false;
    596     }
    597 
    598     const unsigned short kTiffMagic = 0x2A;  // NOLINT
    599     const unsigned int kTiffOffset = 8;
    600     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    601                           kTiffMagic) ||
    602         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
    603                           kTiffOffset)) {
    604       return false;
    605     }
    606 
    607     // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
    608     const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
    609     const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
    610     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    611                             kSignatureAOC, NULL) ||
    612            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
    613                             kSignaturePENTAX, NULL);
    614   }
    615 };
    616 
    617 // Apple format.
    618 class QtkTypeChecker : public TypeChecker {
    619  public:
    620   virtual RawImageTypes Type() const { return kQtkImage; }
    621 
    622   virtual size_t RequestedSize() const { return 8; }
    623 
    624   // Check only the signature at the beginning of the file.
    625   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    626     RangeCheckedBytePtr limited_source = LimitSource(source);
    627 
    628     const size_t kSignatureSize = 2;
    629     const string kSignature[kSignatureSize] = {
    630         string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
    631     };
    632     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
    633            IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
    634   }
    635 };
    636 
    637 // Fuji RAW.
    638 class RafTypeChecker : public TypeChecker {
    639  public:
    640   virtual RawImageTypes Type() const { return kRafImage; }
    641 
    642   virtual size_t RequestedSize() const { return 8; }
    643 
    644   // Check only the signature at the beginning of the file.
    645   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    646     RangeCheckedBytePtr limited_source = LimitSource(source);
    647 
    648     const string kSignature("FUJIFILM");
    649     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
    650   }
    651 };
    652 
    653 // Contax N RAW.
    654 class RawContaxNTypeChecker : public TypeChecker {
    655  public:
    656   virtual RawImageTypes Type() const { return kRawContaxNImage; }
    657 
    658   virtual size_t RequestedSize() const { return 36; }
    659 
    660   // Check only the signature at the (offset == 25) position of the
    661   // file.
    662   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    663     RangeCheckedBytePtr limited_source = LimitSource(source);
    664 
    665     const string kSignature("ARECOYK");
    666     return IsSignatureMatched(limited_source, 25, kSignature);
    667   }
    668 };
    669 
    670 // Panasonic RAW.
    671 class Rw2TypeChecker : public TypeChecker {
    672  public:
    673   virtual RawImageTypes Type() const { return kRw2Image; }
    674 
    675   virtual size_t RequestedSize() const { return 4; }
    676 
    677   // Check two points: 1. valid endianness at the beginning of the
    678   // file; 2. tag at the (offset == 2) position of the file.
    679   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    680     RangeCheckedBytePtr limited_source = LimitSource(source);
    681 
    682     bool use_big_endian;
    683     if (!DetermineEndianness(source, &use_big_endian)) {
    684       return false;
    685     }
    686 
    687     const unsigned short kTag = 0x55;  // NOLINT
    688     return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    689                             kTag);
    690   }
    691 };
    692 
    693 // Samsung RAW.
    694 class SrwTypeChecker : public TypeChecker {
    695  public:
    696   virtual RawImageTypes Type() const { return kSrwImage; }
    697 
    698   virtual size_t RequestedSize() const { return 256; }
    699 
    700   // Check multiple points:
    701   // 1. valid big endianness at the beginning of the file;
    702   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
    703   // 3. the signature "SAMSUNG" in the requested bytes of the file;
    704   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    705     RangeCheckedBytePtr limited_source = LimitSource(source);
    706 
    707     bool use_big_endian;
    708     if (!DetermineEndianness(source, &use_big_endian)) {
    709       return false;
    710     }
    711 
    712     const unsigned short kTiffMagic = 0x2A;  // NOLINT
    713     const unsigned int kTiffOffset = 8;
    714     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
    715                           kTiffMagic) ||
    716         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
    717                           kTiffOffset)) {
    718       return false;
    719     }
    720 
    721     const string kSignature("SAMSUNG");
    722     if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
    723       return false;
    724     }
    725     return true;
    726   }
    727 };
    728 
    729 // Sigma / Polaroid RAW.
    730 class X3fTypeChecker : public TypeChecker {
    731  public:
    732   virtual RawImageTypes Type() const { return kX3fImage; }
    733 
    734   virtual size_t RequestedSize() const { return 4; }
    735 
    736   // Check only the signature at the beginning of the file.
    737   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
    738     RangeCheckedBytePtr limited_source = LimitSource(source);
    739 
    740     const string kSignature("FOVb", 4);
    741     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
    742   }
    743 };
    744 
    745 // This class contains the list of all type checkers. One should used this list
    746 // as a whole to execute the image type recognition.
    747 class TypeCheckerList {
    748  public:
    749   TypeCheckerList() {
    750     // Add all supported RAW type checkers here.
    751     checkers_.push_back(new ArwTypeChecker());
    752     checkers_.push_back(new Cr2TypeChecker());
    753     checkers_.push_back(new CrwTypeChecker());
    754     checkers_.push_back(new DcrTypeChecker());
    755     checkers_.push_back(new DngTypeChecker());
    756     checkers_.push_back(new KdcTypeChecker());
    757     checkers_.push_back(new MosTypeChecker());
    758     checkers_.push_back(new MrwTypeChecker());
    759     checkers_.push_back(new NefTypeChecker());
    760     checkers_.push_back(new NrwTypeChecker());
    761     checkers_.push_back(new OrfTypeChecker());
    762     checkers_.push_back(new PefTypeChecker());
    763     checkers_.push_back(new QtkTypeChecker());
    764     checkers_.push_back(new RafTypeChecker());
    765     checkers_.push_back(new RawContaxNTypeChecker());
    766     checkers_.push_back(new Rw2TypeChecker());
    767     checkers_.push_back(new SrwTypeChecker());
    768     checkers_.push_back(new X3fTypeChecker());
    769 
    770     // Sort the checkers by the ascending RequestedSize() to get better
    771     // performance when checking type.
    772     std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
    773   }
    774 
    775   ~TypeCheckerList() {
    776     for (size_t i = 0; i < checkers_.size(); ++i) {
    777       delete checkers_[i];
    778       checkers_[i] = NULL;
    779     }
    780   }
    781 
    782   // Returns the type of source data. If it can not be identified, returns
    783   // kNonRawImage.
    784   RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
    785     for (size_t i = 0; i < checkers_.size(); ++i) {
    786       if (checkers_[i]->IsMyType(source)) {
    787         return checkers_[i]->Type();
    788       }
    789     }
    790     return kNonRawImage;
    791   }
    792 
    793   // Returns the maximum size of requested size of data for identifying image
    794   // type using this class. The class guarantees that it will not read more than
    795   // this size.
    796   size_t RequestedSize() const {
    797     assert(!checkers_.empty());
    798     // The checkers_ is ascending sorted. The last element is the maximum.
    799     return checkers_.back()->RequestedSize();
    800   }
    801 
    802   bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
    803     const TypeChecker* type_checker = GetTypeCheckerForType(type);
    804     if (type_checker) {
    805       return type_checker->IsMyType(source);
    806     } else {
    807       return false;
    808     }
    809   }
    810 
    811   size_t RequestedSizeForType(const RawImageTypes type) {
    812     const TypeChecker* type_checker = GetTypeCheckerForType(type);
    813     if (type_checker) {
    814       return type_checker->RequestedSize();
    815     } else {
    816       return 0;
    817     }
    818   }
    819 
    820  private:
    821   const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
    822     for (const auto* type_checker : checkers_) {
    823       if (type_checker->Type() == type) {
    824         return type_checker;
    825       }
    826     }
    827     return nullptr;
    828   }
    829 
    830   std::vector<TypeChecker*> checkers_;
    831 };
    832 
    833 }  // namespace
    834 
    835 bool IsRaw(const RawImageTypes type) {
    836   switch (type) {
    837     // Non-RAW-image type
    838     case kNonRawImage: {
    839       return false;
    840     }
    841 
    842     // Raw image types
    843     case kArwImage:
    844     case kCr2Image:
    845     case kCrwImage:
    846     case kDcrImage:
    847     case kDngImage:
    848     case kKdcImage:
    849     case kMosImage:
    850     case kMrwImage:
    851     case kNefImage:
    852     case kNrwImage:
    853     case kOrfImage:
    854     case kPefImage:
    855     case kQtkImage:
    856     case kRafImage:
    857     case kRawContaxNImage:
    858     case kRw2Image:
    859     case kSrwImage:
    860     case kX3fImage: {
    861       return true;
    862     }
    863 
    864     default: {
    865       // Unsupported type!
    866       assert(false);
    867     }
    868   }
    869   return false;
    870 }
    871 
    872 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
    873   return TypeCheckerList().IsOfType(source, type);
    874 }
    875 
    876 RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
    877   return TypeCheckerList().GetType(source);
    878 }
    879 
    880 size_t GetNumberOfBytesForIsRawLite() {
    881   return TypeCheckerList().RequestedSize();
    882 }
    883 
    884 size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
    885   return TypeCheckerList().RequestedSizeForType(type);
    886 }
    887 
    888 bool IsRawLite(const RangeCheckedBytePtr& source) {
    889   return IsRaw(RecognizeRawImageTypeLite(source));
    890 }
    891 
    892 }  // namespace image_type_recognition
    893 }  // namespace piex
    894