Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2015, OpenCV Foundation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "precomp.hpp"
     43 #include <vector>
     44 
     45 #if CV_NEON
     46 #define WITH_NEON
     47 #endif
     48 
     49 namespace cv
     50 {
     51 namespace mjpeg
     52 {
     53 
     54 enum { COLORSPACE_GRAY=0, COLORSPACE_RGBA=1, COLORSPACE_BGR=2, COLORSPACE_YUV444P=3 };
     55 
     56 #define fourCC(a,b,c,d)   ((int)((uchar(d)<<24) | (uchar(c)<<16) | (uchar(b)<<8) | uchar(a)))
     57 
     58 static const int AVIH_STRH_SIZE = 56;
     59 static const int STRF_SIZE = 40;
     60 static const int AVI_DWFLAG = 0x00000910;
     61 static const int AVI_DWSCALE = 1;
     62 static const int AVI_DWQUALITY = -1;
     63 static const int JUNK_SEEK = 4096;
     64 static const int AVIIF_KEYFRAME = 0x10;
     65 static const int MAX_BYTES_PER_SEC = 99999999;
     66 static const int SUG_BUFFER_SIZE = 1048576;
     67 
     68 static const unsigned bit_mask[] =
     69 {
     70     0,
     71     0x00000001, 0x00000003, 0x00000007, 0x0000000F,
     72     0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
     73     0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
     74     0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
     75     0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
     76     0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
     77     0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
     78     0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
     79 };
     80 
     81 class BitStream
     82 {
     83 public:
     84     enum
     85     {
     86         DEFAULT_BLOCK_SIZE = (1 << 15),
     87         huff_val_shift = 20,
     88         huff_code_mask = (1 << huff_val_shift) - 1
     89     };
     90 
     91     BitStream()
     92     {
     93         m_buf.resize(DEFAULT_BLOCK_SIZE + 1024);
     94         m_start = &m_buf[0];
     95         m_end = m_start + DEFAULT_BLOCK_SIZE;
     96         m_is_opened = false;
     97         m_f = 0;
     98     }
     99 
    100     ~BitStream()
    101     {
    102         close();
    103     }
    104 
    105     bool open(const String& filename)
    106     {
    107         close();
    108         m_f = fopen(filename.c_str(), "wb");
    109         if( !m_f )
    110             return false;
    111         m_current = m_start;
    112         m_pos = 0;
    113         return true;
    114     }
    115 
    116     bool isOpened() const { return m_f != 0; }
    117 
    118     void close()
    119     {
    120         writeBlock();
    121         if( m_f )
    122             fclose(m_f);
    123         m_f = 0;
    124     }
    125 
    126     void writeBlock()
    127     {
    128         size_t wsz0 = m_current - m_start;
    129         if( wsz0 > 0 && m_f )
    130         {
    131             size_t wsz = fwrite(m_start, 1, wsz0, m_f);
    132             CV_Assert( wsz == wsz0 );
    133         }
    134         m_pos += wsz0;
    135         m_current = m_start;
    136     }
    137 
    138     size_t getPos() const
    139     {
    140         return (size_t)(m_current - m_start) + m_pos;
    141     }
    142 
    143     void putByte(int val)
    144     {
    145         *m_current++ = (uchar)val;
    146         if( m_current >= m_end )
    147             writeBlock();
    148     }
    149 
    150     void putBytes(const uchar* buf, int count)
    151     {
    152         uchar* data = (uchar*)buf;
    153         CV_Assert(m_f && data && m_current && count >= 0);
    154         if( m_current >= m_end )
    155             writeBlock();
    156 
    157         while( count )
    158         {
    159             int l = (int)(m_end - m_current);
    160 
    161             if (l > count)
    162                 l = count;
    163 
    164             if( l > 0 )
    165             {
    166                 memcpy(m_current, data, l);
    167                 m_current += l;
    168                 data += l;
    169                 count -= l;
    170             }
    171             if( m_current >= m_end )
    172                 writeBlock();
    173         }
    174     }
    175 
    176     void putShort(int val)
    177     {
    178         m_current[0] = (uchar)val;
    179         m_current[1] = (uchar)(val >> 8);
    180         m_current += 2;
    181         if( m_current >= m_end )
    182             writeBlock();
    183     }
    184 
    185     void putInt(int val)
    186     {
    187         m_current[0] = (uchar)val;
    188         m_current[1] = (uchar)(val >> 8);
    189         m_current[2] = (uchar)(val >> 16);
    190         m_current[3] = (uchar)(val >> 24);
    191         m_current += 4;
    192         if( m_current >= m_end )
    193             writeBlock();
    194     }
    195 
    196     void jputShort(int val)
    197     {
    198         m_current[0] = (uchar)(val >> 8);
    199         m_current[1] = (uchar)val;
    200         m_current += 2;
    201         if( m_current >= m_end )
    202             writeBlock();
    203     }
    204 
    205     void patchInt(int val, size_t pos)
    206     {
    207         if( pos >= m_pos )
    208         {
    209             ptrdiff_t delta = pos - m_pos;
    210             CV_Assert( delta < m_current - m_start );
    211             m_start[delta] = (uchar)val;
    212             m_start[delta+1] = (uchar)(val >> 8);
    213             m_start[delta+2] = (uchar)(val >> 16);
    214             m_start[delta+3] = (uchar)(val >> 24);
    215         }
    216         else
    217         {
    218             long fpos = ftell(m_f);
    219             fseek(m_f, (long)pos, SEEK_SET);
    220             uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
    221             fwrite(buf, 1, 4, m_f);
    222             fseek(m_f, fpos, SEEK_SET);
    223         }
    224     }
    225 
    226     void jput(unsigned currval)
    227     {
    228         uchar v;
    229         uchar* ptr = m_current;
    230         v = (uchar)(currval >> 24);
    231         *ptr++ = v;
    232         if( v == 255 )
    233             *ptr++ = 0;
    234         v = (uchar)(currval >> 16);
    235         *ptr++ = v;
    236         if( v == 255 )
    237             *ptr++ = 0;
    238         v = (uchar)(currval >> 8);
    239         *ptr++ = v;
    240         if( v == 255 )
    241             *ptr++ = 0;
    242         v = (uchar)currval;
    243         *ptr++ = v;
    244         if( v == 255 )
    245             *ptr++ = 0;
    246         m_current = ptr;
    247         if( m_current >= m_end )
    248             writeBlock();
    249     }
    250 
    251     void jflush(unsigned currval, int bitIdx)
    252     {
    253         uchar v;
    254         uchar* ptr = m_current;
    255         currval |= (1 << bitIdx)-1;
    256         while( bitIdx < 32 )
    257         {
    258             v = (uchar)(currval >> 24);
    259             *ptr++ = v;
    260             if( v == 255 )
    261                 *ptr++ = 0;
    262             currval <<= 8;
    263             bitIdx += 8;
    264         }
    265         m_current = ptr;
    266         if( m_current >= m_end )
    267             writeBlock();
    268     }
    269 
    270     static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
    271     {
    272         int  i, k;
    273         int  min_val = INT_MAX, max_val = INT_MIN;
    274         int  size;
    275 
    276         /* calc min and max values in the table */
    277         for( i = 1, k = 1; src[k] >= 0; i++ )
    278         {
    279             int code_count = src[k++];
    280 
    281             for( code_count += k; k < code_count; k++ )
    282             {
    283                 int  val = src[k] >> huff_val_shift;
    284                 if( val < min_val )
    285                     min_val = val;
    286                 if( val > max_val )
    287                     max_val = val;
    288             }
    289         }
    290 
    291         size = max_val - min_val + 3;
    292 
    293         if( size > max_size )
    294         {
    295             CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
    296             return false;
    297         }
    298 
    299         memset( table, 0, size*sizeof(table[0]));
    300 
    301         table[0] = min_val;
    302         table[1] = size - 2;
    303 
    304         for( i = 1, k = 1; src[k] >= 0; i++ )
    305         {
    306             int code_count = src[k++];
    307 
    308             for( code_count += k; k < code_count; k++ )
    309             {
    310                 int  val = src[k] >> huff_val_shift;
    311                 int  code = src[k] & huff_code_mask;
    312 
    313                 table[val - min_val + 2] = (code << 8) | i;
    314             }
    315         }
    316         return true;
    317     }
    318 
    319     static int* createSourceHuffmanTable(const uchar* src, int* dst,
    320                                          int max_bits, int first_bits)
    321     {
    322         int   i, val_idx, code = 0;
    323         int*  table = dst;
    324         *dst++ = first_bits;
    325         for (i = 1, val_idx = max_bits; i <= max_bits; i++)
    326         {
    327             int code_count = src[i - 1];
    328             dst[0] = code_count;
    329             code <<= 1;
    330             for (int k = 0; k < code_count; k++)
    331             {
    332                 dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
    333             }
    334             code += code_count;
    335             dst += code_count + 1;
    336             val_idx += code_count;
    337         }
    338         dst[0] = -1;
    339         return  table;
    340     }
    341 
    342 protected:
    343     std::vector<uchar> m_buf;
    344     uchar*  m_start;
    345     uchar*  m_end;
    346     uchar*  m_current;
    347     size_t  m_pos;
    348     bool    m_is_opened;
    349     FILE*   m_f;
    350 };
    351 
    352 
    353 class MotionJpegWriter : public IVideoWriter
    354 {
    355 public:
    356     MotionJpegWriter() { rawstream = false; }
    357     MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
    358     {
    359         rawstream = false;
    360         open(filename, fps, size, iscolor);
    361     }
    362     ~MotionJpegWriter() { close(); }
    363 
    364     void close()
    365     {
    366         if( !strm.isOpened() )
    367             return;
    368 
    369         if( !frameOffset.empty() && !rawstream )
    370         {
    371             endWriteChunk(); // end LIST 'movi'
    372             writeIndex();
    373             finishWriteAVI();
    374         }
    375         strm.close();
    376         frameOffset.clear();
    377         frameSize.clear();
    378         AVIChunkSizeIndex.clear();
    379         frameNumIndexes.clear();
    380     }
    381 
    382     bool open(const String& filename, double fps, Size size, bool iscolor)
    383     {
    384         close();
    385 
    386         if( filename.empty() )
    387             return false;
    388         const char* ext = strrchr(filename.c_str(), '.');
    389         if( !ext )
    390             return false;
    391         if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
    392             return false;
    393 
    394         bool ok = strm.open(filename);
    395         if( !ok )
    396             return false;
    397 
    398         CV_Assert(fps >= 1);
    399         outfps = cvRound(fps);
    400         width = size.width;
    401         height = size.height;
    402         quality = 75;
    403         rawstream = false;
    404         channels = iscolor ? 3 : 1;
    405 
    406         if( !rawstream )
    407         {
    408             startWriteAVI();
    409             writeStreamHeader();
    410         }
    411         //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
    412         return true;
    413     }
    414 
    415     bool isOpened() const { return strm.isOpened(); }
    416 
    417     void startWriteAVI()
    418     {
    419         startWriteChunk(fourCC('R', 'I', 'F', 'F'));
    420 
    421         strm.putInt(fourCC('A', 'V', 'I', ' '));
    422 
    423         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
    424 
    425         strm.putInt(fourCC('h', 'd', 'r', 'l'));
    426         strm.putInt(fourCC('a', 'v', 'i', 'h'));
    427         strm.putInt(AVIH_STRH_SIZE);
    428         strm.putInt(cvRound(1e6 / outfps));
    429         strm.putInt(MAX_BYTES_PER_SEC);
    430         strm.putInt(0);
    431         strm.putInt(AVI_DWFLAG);
    432 
    433         frameNumIndexes.push_back(strm.getPos());
    434 
    435         strm.putInt(0);
    436         strm.putInt(0);
    437         strm.putInt(1); // number of streams
    438         strm.putInt(SUG_BUFFER_SIZE);
    439         strm.putInt(width);
    440         strm.putInt(height);
    441         strm.putInt(0);
    442         strm.putInt(0);
    443         strm.putInt(0);
    444         strm.putInt(0);
    445     }
    446 
    447     void writeStreamHeader()
    448     {
    449         // strh
    450         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
    451 
    452         strm.putInt(fourCC('s', 't', 'r', 'l'));
    453         strm.putInt(fourCC('s', 't', 'r', 'h'));
    454         strm.putInt(AVIH_STRH_SIZE);
    455         strm.putInt(fourCC('v', 'i', 'd', 's'));
    456         strm.putInt(fourCC('M', 'J', 'P', 'G'));
    457         strm.putInt(0);
    458         strm.putInt(0);
    459         strm.putInt(0);
    460         strm.putInt(AVI_DWSCALE);
    461         strm.putInt(outfps);
    462         strm.putInt(0);
    463 
    464         frameNumIndexes.push_back(strm.getPos());
    465 
    466         strm.putInt(0);
    467         strm.putInt(SUG_BUFFER_SIZE);
    468         strm.putInt(AVI_DWQUALITY);
    469         strm.putInt(0);
    470         strm.putShort(0);
    471         strm.putShort(0);
    472         strm.putShort(width);
    473         strm.putShort(height);
    474 
    475         // strf (use the BITMAPINFOHEADER for video)
    476         startWriteChunk(fourCC('s', 't', 'r', 'f'));
    477 
    478         strm.putInt(STRF_SIZE);
    479         strm.putInt(width);
    480         strm.putInt(height);
    481         strm.putShort(1); // planes (1 means interleaved data (after decompression))
    482 
    483         strm.putShort(channels); // bits per pixel
    484         strm.putInt(fourCC('M', 'J', 'P', 'G'));
    485         strm.putInt(width * height * channels);
    486         strm.putInt(0);
    487         strm.putInt(0);
    488         strm.putInt(0);
    489         strm.putInt(0);
    490         // Must be indx chunk
    491         endWriteChunk(); // end strf
    492         endWriteChunk(); // end strl
    493 
    494         // odml
    495         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
    496         strm.putInt(fourCC('o', 'd', 'm', 'l'));
    497         startWriteChunk(fourCC('d', 'm', 'l', 'h'));
    498 
    499         frameNumIndexes.push_back(strm.getPos());
    500 
    501         strm.putInt(0);
    502         strm.putInt(0);
    503 
    504         endWriteChunk(); // end dmlh
    505         endWriteChunk(); // end odml
    506 
    507         endWriteChunk(); // end hdrl
    508 
    509         // JUNK
    510         startWriteChunk(fourCC('J', 'U', 'N', 'K'));
    511         size_t pos = strm.getPos();
    512         for( ; pos < (size_t)JUNK_SEEK; pos += 4 )
    513             strm.putInt(0);
    514         endWriteChunk(); // end JUNK
    515         // movi
    516         startWriteChunk(fourCC('L', 'I', 'S', 'T'));
    517         moviPointer = strm.getPos();
    518         strm.putInt(fourCC('m', 'o', 'v', 'i'));
    519     }
    520 
    521     void startWriteChunk(int fourcc)
    522     {
    523         CV_Assert(fourcc != 0);
    524         strm.putInt(fourcc);
    525 
    526         AVIChunkSizeIndex.push_back(strm.getPos());
    527         strm.putInt(0);
    528     }
    529 
    530     void endWriteChunk()
    531     {
    532         if( !AVIChunkSizeIndex.empty() )
    533         {
    534             size_t currpos = strm.getPos();
    535             size_t pospos = AVIChunkSizeIndex.back();
    536             AVIChunkSizeIndex.pop_back();
    537             int chunksz = (int)(currpos - (pospos + 4));
    538             strm.patchInt(chunksz, pospos);
    539         }
    540     }
    541 
    542     void writeIndex()
    543     {
    544         // old style AVI index. Must be Open-DML index
    545         startWriteChunk(fourCC('i', 'd', 'x', '1'));
    546         int nframes = (int)frameOffset.size();
    547         for( int i = 0; i < nframes; i++ )
    548         {
    549             strm.putInt(fourCC('0', '0', 'd', 'c'));
    550             strm.putInt(AVIIF_KEYFRAME);
    551             strm.putInt((int)frameOffset[i]);
    552             strm.putInt((int)frameSize[i]);
    553         }
    554         endWriteChunk(); // End idx1
    555     }
    556 
    557     void finishWriteAVI()
    558     {
    559         int nframes = (int)frameOffset.size();
    560         // Record frames numbers to AVI Header
    561         while (!frameNumIndexes.empty())
    562         {
    563             size_t ppos = frameNumIndexes.back();
    564             frameNumIndexes.pop_back();
    565             strm.patchInt(nframes, ppos);
    566         }
    567         endWriteChunk(); // end RIFF
    568     }
    569 
    570     void write(InputArray _img)
    571     {
    572         Mat img = _img.getMat();
    573         size_t chunkPointer = strm.getPos();
    574         int input_channels = img.channels();
    575         int colorspace = -1;
    576 
    577         if( input_channels == 1 && channels == 1 )
    578         {
    579             CV_Assert( img.cols == width && img.rows == height );
    580             colorspace = COLORSPACE_GRAY;
    581         }
    582         else if( input_channels == 4 )
    583         {
    584             CV_Assert( img.cols == width && img.rows == height && channels == 3 );
    585             colorspace = COLORSPACE_RGBA;
    586         }
    587         else if( input_channels == 3 )
    588         {
    589             CV_Assert( img.cols == width && img.rows == height && channels == 3 );
    590             colorspace = COLORSPACE_BGR;
    591         }
    592         else if( input_channels == 1 && channels == 3 )
    593         {
    594             CV_Assert( img.cols == width && img.rows == height*3 );
    595             colorspace = COLORSPACE_YUV444P;
    596         }
    597         else
    598             CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
    599 
    600         if( !rawstream )
    601             startWriteChunk(fourCC('0', '0', 'd', 'c'));
    602 
    603         writeFrameData(img.data, (int)img.step, colorspace, input_channels);
    604 
    605         if( !rawstream )
    606         {
    607             frameOffset.push_back(chunkPointer - moviPointer);
    608             frameSize.push_back(strm.getPos() - chunkPointer - 8);       // Size excludes '00dc' and size field
    609             endWriteChunk(); // end '00dc'
    610         }
    611     }
    612 
    613     double getProperty(int propId) const
    614     {
    615         if( propId == VIDEOWRITER_PROP_QUALITY )
    616             return quality;
    617         if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
    618             return frameSize.empty() ? 0. : (double)frameSize.back();
    619         return 0.;
    620     }
    621 
    622     bool setProperty(int propId, double value)
    623     {
    624         if( propId == VIDEOWRITER_PROP_QUALITY )
    625         {
    626             quality = value;
    627             return true;
    628         }
    629         return false;
    630     }
    631 
    632     void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
    633 
    634 protected:
    635     int outfps;
    636     int width, height, channels;
    637     double quality;
    638     size_t moviPointer;
    639     std::vector<size_t> frameOffset, frameSize, AVIChunkSizeIndex, frameNumIndexes;
    640     bool rawstream;
    641 
    642     BitStream strm;
    643 };
    644 
    645 #define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
    646 #define fix(x, n)   (int)((x)*(1 << (n)) + .5);
    647 
    648 enum
    649 {
    650     fixb = 14,
    651     fixc = 12,
    652     postshift = 14
    653 };
    654 
    655 static const int C0_707 = fix(0.707106781f, fixb);
    656 static const int C0_541 = fix(0.541196100f, fixb);
    657 static const int C0_382 = fix(0.382683432f, fixb);
    658 static const int C1_306 = fix(1.306562965f, fixb);
    659 
    660 static const int y_r = fix(0.299, fixc);
    661 static const int y_g = fix(0.587, fixc);
    662 static const int y_b = fix(0.114, fixc);
    663 
    664 static const int cb_r = -fix(0.1687, fixc);
    665 static const int cb_g = -fix(0.3313, fixc);
    666 static const int cb_b = fix(0.5, fixc);
    667 
    668 static const int cr_r = fix(0.5, fixc);
    669 static const int cr_g = -fix(0.4187, fixc);
    670 static const int cr_b = -fix(0.0813, fixc);
    671 
    672 // Standard JPEG quantization tables
    673 static const uchar jpegTableK1_T[] =
    674 {
    675     16, 12, 14, 14,  18,  24,  49,  72,
    676     11, 12, 13, 17,  22,  35,  64,  92,
    677     10, 14, 16, 22,  37,  55,  78,  95,
    678     16, 19, 24, 29,  56,  64,  87,  98,
    679     24, 26, 40, 51,  68,  81, 103, 112,
    680     40, 58, 57, 87, 109, 104, 121, 100,
    681     51, 60, 69, 80, 103, 113, 120, 103,
    682     61, 55, 56, 62,  77,  92, 101,  99
    683 };
    684 
    685 static const uchar jpegTableK2_T[] =
    686 {
    687     17, 18, 24, 47, 99, 99, 99, 99,
    688     18, 21, 26, 66, 99, 99, 99, 99,
    689     24, 26, 56, 99, 99, 99, 99, 99,
    690     47, 66, 99, 99, 99, 99, 99, 99,
    691     99, 99, 99, 99, 99, 99, 99, 99,
    692     99, 99, 99, 99, 99, 99, 99, 99,
    693     99, 99, 99, 99, 99, 99, 99, 99,
    694     99, 99, 99, 99, 99, 99, 99, 99
    695 };
    696 
    697 // Standard Huffman tables
    698 
    699 // ... for luma DCs.
    700 static const uchar jpegTableK3[] =
    701 {
    702     0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
    703     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
    704 };
    705 
    706 // ... for chroma DCs.
    707 static const uchar jpegTableK4[] =
    708 {
    709     0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
    710     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
    711 };
    712 
    713 // ... for luma ACs.
    714 static const uchar jpegTableK5[] =
    715 {
    716     0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
    717     0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
    718     0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
    719     0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
    720     0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
    721     0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
    722     0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
    723     0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
    724     0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
    725     0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
    726     0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
    727     0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
    728     0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
    729     0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
    730     0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
    731     0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
    732     0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
    733     0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
    734     0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
    735     0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
    736     0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
    737     0xf9, 0xfa
    738 };
    739 
    740 // ... for chroma ACs
    741 static const uchar jpegTableK6[] =
    742 {
    743     0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
    744     0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
    745     0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
    746     0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
    747     0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
    748     0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
    749     0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
    750     0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
    751     0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
    752     0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
    753     0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
    754     0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
    755     0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    756     0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
    757     0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
    758     0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
    759     0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
    760     0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
    761     0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
    762     0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
    763     0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
    764     0xf9, 0xfa
    765 };
    766 
    767 static const uchar zigzag[] =
    768 {
    769     0,  8,  1,  2,  9, 16, 24, 17, 10,  3,  4, 11, 18, 25, 32, 40,
    770     33, 26, 19, 12,  5,  6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
    771     28, 21, 14,  7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
    772     23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
    773     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
    774 };
    775 
    776 
    777 static const int idct_prescale[] =
    778 {
    779     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    780     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
    781     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
    782     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
    783     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    784     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
    785     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
    786     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
    787 };
    788 
    789 static const char jpegHeader[] =
    790 "\xFF\xD8"  // SOI  - start of image
    791 "\xFF\xE0"  // APP0 - jfif extention
    792 "\x00\x10"  // 2 bytes: length of APP0 segment
    793 "JFIF\x00"  // JFIF signature
    794 "\x01\x02"  // version of JFIF
    795 "\x00"      // units = pixels ( 1 - inch, 2 - cm )
    796 "\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
    797 "\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
    798 
    799 #ifdef WITH_NEON
    800 // FDCT with postscaling
    801 static void aan_fdct8x8( const short *src, short *dst,
    802                         int step, const short *postscale )
    803 {
    804     // Pass 1: process rows
    805     int16x8_t x0 = vld1q_s16(src);    int16x8_t x1 = vld1q_s16(src + step*7);
    806     int16x8_t x2 = vld1q_s16(src + step*3);    int16x8_t x3 = vld1q_s16(src + step*4);
    807 
    808     int16x8_t x4 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
    809     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
    810 
    811     int16x8_t t1 = x0; int16x8_t t2 = x2;
    812 
    813     x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
    814 
    815     x0 = vld1q_s16(src + step);    x3 = vld1q_s16(src + step*6);
    816 
    817     x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
    818     int16x8_t t3 = x0;
    819 
    820     x0 = vld1q_s16(src + step*2);    x3 = vld1q_s16(src + step*5);
    821 
    822     int16x8_t t4 = vsubq_s16(x0, x3);
    823 
    824     x0 = vaddq_s16(x0, x3);
    825     x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
    826     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
    827 
    828     int16x8_t res0 = x1;
    829     int16x8_t res4 = x2;
    830     x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
    831     x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
    832 
    833     int16x8_t res2 = x4;
    834     int16x8_t res6 = x1;
    835 
    836     x0 = t2;    x1 = t4;
    837     x2 = t3;    x3 = t1;
    838     x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
    839     x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
    840 
    841     x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
    842     x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
    843     x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
    844     x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
    845 
    846     x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
    847     x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
    848 
    849     int16x8_t res1 = x0;
    850     int16x8_t res3 = x3;
    851     int16x8_t res5 = x1;
    852     int16x8_t res7 = x4;
    853 
    854     //transpose a matrix
    855     /*
    856      res0 00 01 02 03 04 05 06 07
    857      res1 10 11 12 13 14 15 16 17
    858      res2 20 21 22 23 24 25 26 27
    859      res3 30 31 32 33 34 35 36 37
    860      res4 40 41 42 43 44 45 46 47
    861      res5 50 51 52 53 54 55 56 57
    862      res6 60 61 62 63 64 65 66 67
    863      res7 70 71 72 73 74 75 76 77
    864      */
    865 
    866     //transpose elements 00-33
    867     int16x4_t res0_0 = vget_low_s16(res0);
    868     int16x4_t res1_0 = vget_low_s16(res1);
    869     int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
    870     int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    871 
    872     res0_0 = vget_low_s16(res2);
    873     res1_0 = vget_low_s16(res3);
    874     tres = vtrn_s16(res0_0, res1_0);
    875     int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    876 
    877     int32x4x2_t tres1 = vtrnq_s32(l0, l1);
    878 
    879     // transpose elements 40-73
    880     res0_0 = vget_low_s16(res4);
    881     res1_0 = vget_low_s16(res5);
    882     tres = vtrn_s16(res0_0, res1_0);
    883     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    884 
    885     res0_0 = vget_low_s16(res6);
    886     res1_0 = vget_low_s16(res7);
    887 
    888     tres = vtrn_s16(res0_0, res1_0);
    889     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    890 
    891     int32x4x2_t tres2 = vtrnq_s32(l0, l1);
    892 
    893     //combine into 0-3
    894     int16x8_t transp_res0 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
    895     int16x8_t transp_res1 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
    896     int16x8_t transp_res2 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
    897     int16x8_t transp_res3 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
    898 
    899     // transpose elements 04-37
    900     res0_0 = vget_high_s16(res0);
    901     res1_0 = vget_high_s16(res1);
    902     tres = vtrn_s16(res0_0, res1_0);
    903     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    904 
    905     res0_0 = vget_high_s16(res2);
    906     res1_0 = vget_high_s16(res3);
    907 
    908     tres = vtrn_s16(res0_0, res1_0);
    909     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    910 
    911     tres1 = vtrnq_s32(l0, l1);
    912 
    913     // transpose elements 44-77
    914     res0_0 = vget_high_s16(res4);
    915     res1_0 = vget_high_s16(res5);
    916     tres = vtrn_s16(res0_0, res1_0);
    917     l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    918 
    919     res0_0 = vget_high_s16(res6);
    920     res1_0 = vget_high_s16(res7);
    921 
    922     tres = vtrn_s16(res0_0, res1_0);
    923     l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
    924 
    925     tres2 = vtrnq_s32(l0, l1);
    926 
    927     //combine into 4-7
    928     int16x8_t transp_res4 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
    929     int16x8_t transp_res5 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
    930     int16x8_t transp_res6 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
    931     int16x8_t transp_res7 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
    932 
    933     //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
    934 #define STORE_DESCALED(addr, reg, mul_addr)            postscale_line = vld1q_s16((mul_addr)); \
    935 mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
    936 reg = vabsq_s16(reg); \
    937 reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
    938 reg = vsubq_s16(veorq_s16(reg, mask), mask); \
    939 vst1q_s16((addr), reg);
    940 
    941     int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
    942 
    943     // pass 2: process columns
    944     x0 = transp_res0;    x1 = transp_res7;
    945     x2 = transp_res3;    x3 = transp_res4;
    946 
    947     x4 = vaddq_s16(x0, x1);   x0 = vsubq_s16(x0, x1);
    948     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
    949 
    950     t1 = x0; t2 = x2;
    951 
    952     x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
    953 
    954     x0 = transp_res1;
    955     x3 = transp_res6;
    956 
    957     x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
    958 
    959     t3 = x0;
    960 
    961     x0 = transp_res2; x3 = transp_res5;
    962 
    963     t4 = vsubq_s16(x0, x3);
    964 
    965     x0 = vaddq_s16(x0, x3);
    966 
    967     x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
    968     x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
    969 
    970     STORE_DESCALED(dst, x1, postscale);
    971     STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
    972 
    973     x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
    974 
    975     x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
    976 
    977     STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
    978     STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
    979 
    980     x0 = t2; x1 = t4;
    981     x2 = t3; x3 = t1;
    982 
    983     x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
    984 
    985     x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
    986 
    987     x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
    988 
    989     x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
    990     x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
    991     x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
    992 
    993     x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
    994     x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
    995 
    996     STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
    997     STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
    998     STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
    999     STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
   1000 }
   1001 
   1002 #else
   1003 // FDCT with postscaling
   1004 static void aan_fdct8x8( const short *src, short *dst,
   1005                         int step, const short *postscale )
   1006 {
   1007     int workspace[64], *work = workspace;
   1008     int  i;
   1009 
   1010     // Pass 1: process rows
   1011     for( i = 8; i > 0; i--, src += step, work += 8 )
   1012     {
   1013         int x0 = src[0], x1 = src[7];
   1014         int x2 = src[3], x3 = src[4];
   1015 
   1016         int x4 = x0 + x1; x0 -= x1;
   1017         x1 = x2 + x3; x2 -= x3;
   1018 
   1019         work[7] = x0; work[1] = x2;
   1020         x2 = x4 + x1; x4 -= x1;
   1021 
   1022         x0 = src[1]; x3 = src[6];
   1023         x1 = x0 + x3; x0 -= x3;
   1024         work[5] = x0;
   1025 
   1026         x0 = src[2]; x3 = src[5];
   1027         work[3] = x0 - x3; x0 += x3;
   1028 
   1029         x3 = x0 + x1; x0 -= x1;
   1030         x1 = x2 + x3; x2 -= x3;
   1031 
   1032         work[0] = x1; work[4] = x2;
   1033 
   1034         x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
   1035         x1 = x4 + x0; x4 -= x0;
   1036         work[2] = x4; work[6] = x1;
   1037 
   1038         x0 = work[1]; x1 = work[3];
   1039         x2 = work[5]; x3 = work[7];
   1040 
   1041         x0 += x1; x1 += x2; x2 += x3;
   1042         x1 = DCT_DESCALE(x1*C0_707, fixb);
   1043 
   1044         x4 = x1 + x3; x3 -= x1;
   1045         x1 = (x0 - x2)*C0_382;
   1046         x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
   1047         x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
   1048 
   1049         x1 = x0 + x3; x3 -= x0;
   1050         x0 = x4 + x2; x4 -= x2;
   1051 
   1052         work[5] = x1; work[1] = x0;
   1053         work[7] = x4; work[3] = x3;
   1054     }
   1055 
   1056     work = workspace;
   1057     // pass 2: process columns
   1058     for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
   1059     {
   1060         int  x0 = work[8*0], x1 = work[8*7];
   1061         int  x2 = work[8*3], x3 = work[8*4];
   1062 
   1063         int  x4 = x0 + x1; x0 -= x1;
   1064         x1 = x2 + x3; x2 -= x3;
   1065 
   1066         work[8*7] = x0; work[8*0] = x2;
   1067         x2 = x4 + x1; x4 -= x1;
   1068 
   1069         x0 = work[8*1]; x3 = work[8*6];
   1070         x1 = x0 + x3; x0 -= x3;
   1071         work[8*4] = x0;
   1072 
   1073         x0 = work[8*2]; x3 = work[8*5];
   1074         work[8*3] = x0 - x3; x0 += x3;
   1075 
   1076         x3 = x0 + x1; x0 -= x1;
   1077         x1 = x2 + x3; x2 -= x3;
   1078 
   1079         dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
   1080         dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
   1081 
   1082         x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
   1083         x1 = x4 + x0; x4 -= x0;
   1084 
   1085         dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
   1086         dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
   1087 
   1088         x0 = work[8*0]; x1 = work[8*3];
   1089         x2 = work[8*4]; x3 = work[8*7];
   1090 
   1091         x0 += x1; x1 += x2; x2 += x3;
   1092         x1 = DCT_DESCALE(x1*C0_707, fixb);
   1093 
   1094         x4 = x1 + x3; x3 -= x1;
   1095         x1 = (x0 - x2)*C0_382;
   1096         x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
   1097         x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
   1098 
   1099         x1 = x0 + x3; x3 -= x0;
   1100         x0 = x4 + x2; x4 -= x2;
   1101 
   1102         dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
   1103         dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
   1104         dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
   1105         dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
   1106     }
   1107 }
   1108 #endif
   1109 
   1110 void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
   1111 {
   1112     //double total_cvt = 0, total_dct = 0;
   1113     static bool init_cat_table = false;
   1114     const int CAT_TAB_SIZE = 4096;
   1115     static uchar cat_table[CAT_TAB_SIZE*2+1];
   1116     if( !init_cat_table )
   1117     {
   1118         for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
   1119         {
   1120             Cv32suf a;
   1121             a.f = (float)i;
   1122             cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
   1123         }
   1124         init_cat_table = true;
   1125     }
   1126 
   1127     //double total_dct = 0, total_cvt = 0;
   1128     CV_Assert( data && width > 0 && height > 0 );
   1129 
   1130     // encode the header and tables
   1131     // for each mcu:
   1132     //   convert rgb to yuv with downsampling (if color).
   1133     //   for every block:
   1134     //     calc dct and quantize
   1135     //     encode block.
   1136     int x, y;
   1137     int i, j;
   1138     const int max_quality = 12;
   1139     short fdct_qtab[2][64];
   1140     unsigned huff_dc_tab[2][16];
   1141     unsigned huff_ac_tab[2][256];
   1142 
   1143     int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
   1144     int  dc_pred[] = { 0, 0, 0 };
   1145     int  x_step = x_scale * 8;
   1146     int  y_step = y_scale * 8;
   1147     short  block[6][64];
   1148     short  buffer[4096];
   1149     int*   hbuffer = (int*)buffer;
   1150     int  luma_count = x_scale*y_scale;
   1151     int  block_count = luma_count + channels - 1;
   1152     int  Y_step = x_scale*8;
   1153     const int UV_step = 16;
   1154     int u_plane_ofs = step*height;
   1155     int v_plane_ofs = u_plane_ofs + step*height;
   1156     double _quality = quality*0.01*max_quality;
   1157 
   1158     if( _quality < 1. ) _quality = 1.;
   1159     if( _quality > max_quality ) _quality = max_quality;
   1160 
   1161     double inv_quality = 1./_quality;
   1162 
   1163     // Encode header
   1164     strm.putBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
   1165 
   1166     // Encode quantization tables
   1167     for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
   1168     {
   1169         const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
   1170         int chroma_scale = i > 0 ? luma_count : 1;
   1171 
   1172         strm.jputShort( 0xffdb );   // DQT marker
   1173         strm.jputShort( 2 + 65*1 ); // put single qtable
   1174         strm.putByte( 0*16 + i );   // 8-bit table
   1175 
   1176         // put coefficients
   1177         for( j = 0; j < 64; j++ )
   1178         {
   1179             int idx = zigzag[j];
   1180             int qval = cvRound(qtable[idx]*inv_quality);
   1181             if( qval < 1 )
   1182                 qval = 1;
   1183             if( qval > 255 )
   1184                 qval = 255;
   1185             fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
   1186                                 (qval*chroma_scale*idct_prescale[idx]));
   1187             strm.putByte( qval );
   1188         }
   1189     }
   1190 
   1191     // Encode huffman tables
   1192     for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
   1193     {
   1194         const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
   1195         i == 2 ? jpegTableK4 : jpegTableK6;
   1196         int is_ac_tab = i & 1;
   1197         int idx = i >= 2;
   1198         int tableSize = 16 + (is_ac_tab ? 162 : 12);
   1199 
   1200         strm.jputShort( 0xFFC4 );      // DHT marker
   1201         strm.jputShort( 3 + tableSize ); // define one huffman table
   1202         strm.putByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
   1203         strm.putBytes( htable, tableSize ); // put table
   1204 
   1205         BitStream::createEncodeHuffmanTable( BitStream::createSourceHuffmanTable(
   1206                                             htable, hbuffer, 16, 9 ), is_ac_tab ? huff_ac_tab[idx] :
   1207                                             huff_dc_tab[idx], is_ac_tab ? 256 : 16 );
   1208     }
   1209 
   1210     // put frame header
   1211     strm.jputShort( 0xFFC0 );          // SOF0 marker
   1212     strm.jputShort( 8 + 3*channels );  // length of frame header
   1213     strm.putByte( 8 );               // sample precision
   1214     strm.jputShort( height );
   1215     strm.jputShort( width );
   1216     strm.putByte( channels );        // number of components
   1217 
   1218     for( i = 0; i < channels; i++ )
   1219     {
   1220         strm.putByte( i + 1 );  // (i+1)-th component id (Y,U or V)
   1221         if( i == 0 )
   1222             strm.putByte(x_scale*16 + y_scale); // chroma scale factors
   1223         else
   1224             strm.putByte(1*16 + 1);
   1225         strm.putByte( i > 0 ); // quantization table idx
   1226     }
   1227 
   1228     // put scan header
   1229     strm.jputShort( 0xFFDA );          // SOS marker
   1230     strm.jputShort( 6 + 2*channels );  // length of scan header
   1231     strm.putByte( channels );          // number of components in the scan
   1232 
   1233     for( i = 0; i < channels; i++ )
   1234     {
   1235         strm.putByte( i+1 );             // component id
   1236         strm.putByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
   1237     }
   1238 
   1239     strm.jputShort(0*256 + 63); // start and end of spectral selection - for
   1240     // sequential DCT start is 0 and end is 63
   1241 
   1242     strm.putByte( 0 );  // successive approximation bit position
   1243     // high & low - (0,0) for sequential DCT
   1244     unsigned currval = 0, code = 0, tempval = 0;
   1245     int bit_idx = 32;
   1246 
   1247 #define JPUT_BITS(val, bits) \
   1248     bit_idx -= (bits); \
   1249     tempval = (val) & bit_mask[(bits)]; \
   1250     if( bit_idx <= 0 ) \
   1251     {  \
   1252         strm.jput(currval | ((unsigned)tempval >> -bit_idx)); \
   1253         bit_idx += 32; \
   1254         currval = bit_idx < 32 ? (tempval << bit_idx) : 0; \
   1255     } \
   1256     else \
   1257         currval |= (tempval << bit_idx)
   1258 
   1259 #define JPUT_HUFF(val, table) \
   1260     code = table[(val) + 2]; \
   1261     JPUT_BITS(code >> 8, (int)(code & 255))
   1262 
   1263     // encode data
   1264     for( y = 0; y < height; y += y_step, data += y_step*step )
   1265     {
   1266         for( x = 0; x < width; x += x_step )
   1267         {
   1268             int x_limit = x_step;
   1269             int y_limit = y_step;
   1270             const uchar* pix_data = data + x*input_channels;
   1271             short* Y_data = block[0];
   1272 
   1273             if( x + x_limit > width ) x_limit = width - x;
   1274             if( y + y_limit > height ) y_limit = height - y;
   1275 
   1276             memset( block, 0, block_count*64*sizeof(block[0][0]));
   1277 
   1278             if( channels > 1 )
   1279             {
   1280                 short* UV_data = block[luma_count];
   1281                 // double t = (double)cv::getTickCount();
   1282 
   1283                 if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
   1284                 {
   1285                     for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
   1286                     {
   1287 #ifdef WITH_NEON
   1288                         {
   1289                             uint16x8_t masklo = vdupq_n_u16(255);
   1290                             uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
   1291                             uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
   1292                             lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
   1293                             uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
   1294                             t1 = vaddq_u16(t1, t2);
   1295                             vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
   1296 
   1297                             lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
   1298                             t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
   1299                             lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
   1300                             t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
   1301                             t1 = vaddq_u16(t1, t2);
   1302                             vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
   1303                         }
   1304 
   1305                         {
   1306                             int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
   1307                             int16x8_t delta = vdupq_n_s16(128);
   1308                             lane = vsubq_s16(lane, delta);
   1309                             vst1q_s16(Y_data, lane);
   1310 
   1311                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
   1312                             lane = vsubq_s16(lane, delta);
   1313                             vst1q_s16(Y_data + 8, lane);
   1314 
   1315                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
   1316                             lane = vsubq_s16(lane, delta);
   1317                             vst1q_s16(Y_data+Y_step, lane);
   1318 
   1319                             lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
   1320                             lane = vsubq_s16(lane, delta);
   1321                             vst1q_s16(Y_data+Y_step + 8, lane);
   1322                         }
   1323 #else
   1324                         for( j = 0; j < x_limit; j += 2, pix_data += 2 )
   1325                         {
   1326                             Y_data[j] = pix_data[0] - 128;
   1327                             Y_data[j+1] = pix_data[1] - 128;
   1328                             Y_data[j+Y_step] = pix_data[step] - 128;
   1329                             Y_data[j+Y_step+1] = pix_data[step+1] - 128;
   1330 
   1331                             UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
   1332                                 pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
   1333                             UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
   1334                                 pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
   1335 
   1336                         }
   1337 
   1338                         pix_data -= x_limit*input_channels;
   1339 #endif
   1340                     }
   1341                 }
   1342                 else
   1343                 {
   1344                     for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
   1345                     {
   1346                         for( j = 0; j < x_limit; j++, pix_data += input_channels )
   1347                         {
   1348                             int Y, U, V;
   1349 
   1350                             if( colorspace == COLORSPACE_BGR )
   1351                             {
   1352                                 int r = pix_data[2];
   1353                                 int g = pix_data[1];
   1354                                 int b = pix_data[0];
   1355 
   1356                                 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
   1357                                 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
   1358                                 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
   1359                             }
   1360                             else if( colorspace == COLORSPACE_RGBA )
   1361                             {
   1362                                 int r = pix_data[0];
   1363                                 int g = pix_data[1];
   1364                                 int b = pix_data[2];
   1365 
   1366                                 Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
   1367                                 U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
   1368                                 V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
   1369                             }
   1370                             else
   1371                             {
   1372                                 Y = pix_data[0] - 128;
   1373                                 U = pix_data[v_plane_ofs] - 128;
   1374                                 V = pix_data[u_plane_ofs] - 128;
   1375                             }
   1376 
   1377                             int j2 = j >> (x_scale - 1);
   1378                             Y_data[j] = (short)Y;
   1379                             UV_data[j2] = (short)(UV_data[j2] + U);
   1380                             UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
   1381                         }
   1382 
   1383                         pix_data -= x_limit*input_channels;
   1384                         if( ((i+1) & (y_scale - 1)) == 0 )
   1385                         {
   1386                             UV_data += UV_step;
   1387                         }
   1388                     }
   1389                 }
   1390 
   1391                 // total_cvt += (double)cv::getTickCount() - t;
   1392             }
   1393             else
   1394             {
   1395                 for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
   1396                 {
   1397                     for( j = 0; j < x_limit; j++ )
   1398                         Y_data[j] = (short)(pix_data[j]*4 - 128*4);
   1399                 }
   1400             }
   1401 
   1402             for( i = 0; i < block_count; i++ )
   1403             {
   1404                 int is_chroma = i >= luma_count;
   1405                 int src_step = x_scale * 8;
   1406                 int run = 0, val;
   1407                 const short* src_ptr = block[i & -2] + (i & 1)*8;
   1408                 const unsigned* htable = huff_ac_tab[is_chroma];
   1409 
   1410                 //double t = (double)cv::getTickCount();
   1411                 aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
   1412                 //total_dct += (double)cv::getTickCount() - t;
   1413 
   1414                 j = is_chroma + (i > luma_count);
   1415                 val = buffer[0] - dc_pred[j];
   1416                 dc_pred[j] = buffer[0];
   1417 
   1418                 {
   1419                     int cat = cat_table[val + CAT_TAB_SIZE];
   1420 
   1421                     //CV_Assert( cat <= 11 );
   1422                     JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
   1423                     JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
   1424                 }
   1425 
   1426                 for( j = 1; j < 64; j++ )
   1427                 {
   1428                     val = buffer[zigzag[j]];
   1429 
   1430                     if( val == 0 )
   1431                     {
   1432                         run++;
   1433                     }
   1434                     else
   1435                     {
   1436                         while( run >= 16 )
   1437                         {
   1438                             JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
   1439                             run -= 16;
   1440                         }
   1441 
   1442                         {
   1443                             int cat = cat_table[val + CAT_TAB_SIZE];
   1444                             //CV_Assert( cat <= 10 );
   1445                             JPUT_HUFF( cat + run*16, htable );
   1446                             JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
   1447                         }
   1448 
   1449                         run = 0;
   1450                     }
   1451                 }
   1452 
   1453                 if( run )
   1454                 {
   1455                     JPUT_HUFF( 0x00, htable ); // encode EOB
   1456                 }
   1457             }
   1458         }
   1459     }
   1460 
   1461     // Flush
   1462     strm.jflush(currval, bit_idx);
   1463     strm.jputShort( 0xFFD9 ); // EOI marker
   1464     /*printf("total dct = %.1fms, total cvt = %.1fms\n",
   1465      total_dct*1000./cv::getTickFrequency(),
   1466      total_cvt*1000./cv::getTickFrequency());*/
   1467     size_t pos = strm.getPos();
   1468     size_t pos1 = (pos + 3) & ~3;
   1469     for( ; pos < pos1; pos++ )
   1470         strm.putByte(0);
   1471 }
   1472 
   1473 }
   1474 
   1475 Ptr<IVideoWriter> createMotionJpegWriter( const String& filename, double fps, Size frameSize, bool iscolor )
   1476 {
   1477     Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
   1478     if( !iwriter->isOpened() )
   1479         iwriter.release();
   1480     return iwriter;
   1481 }
   1482 
   1483 }
   1484