Home | History | Annotate | Download | only in common
      1 
      2 /********************************************************************
      3  *                                                                  *
      4  * THIS FILE IS PART OF THE 'ZYWRLE' VNC CODEC SOURCE CODE.         *
      5  *                                                                  *
      6  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
      7  * GOVERNED BY A FOLLOWING BSD-STYLE SOURCE LICENSE.                *
      8  * PLEASE READ THESE TERMS BEFORE DISTRIBUTING.                     *
      9  *                                                                  *
     10  * THE 'ZYWRLE' VNC CODEC SOURCE CODE IS (C) COPYRIGHT 2006         *
     11  * BY Hitachi Systems & Services, Ltd.                              *
     12  * (Noriaki Yamazaki, Research & Developement Center)               *                                                                 *
     13  *                                                                  *
     14  ********************************************************************
     15 Redistribution and use in source and binary forms, with or without
     16 modification, are permitted provided that the following conditions
     17 are met:
     18 
     19 - Redistributions of source code must retain the above copyright
     20 notice, this list of conditions and the following disclaimer.
     21 
     22 - Redistributions in binary form must reproduce the above copyright
     23 notice, this list of conditions and the following disclaimer in the
     24 documentation and/or other materials provided with the distribution.
     25 
     26 - Neither the name of the Hitachi Systems & Services, Ltd. nor
     27 the names of its contributors may be used to endorse or promote
     28 products derived from this software without specific prior written
     29 permission.
     30 
     31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     32 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     33 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     34 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
     35 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     36 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     37 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     38 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     39 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     40 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     41 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     42  ********************************************************************/
     43 
     44 /* Change Log:
     45      V0.02 : 2008/02/04 : Fix mis encode/decode when width != scanline
     46 	                     (Thanks Johannes Schindelin, author of LibVNC
     47 						  Server/Client)
     48      V0.01 : 2007/02/06 : Initial release
     49 */
     50 
     51 /* #define ZYWRLE_ENCODE */
     52 /* #define ZYWRLE_DECODE */
     53 #define ZYWRLE_QUANTIZE
     54 
     55 /*
     56 [References]
     57  PLHarr:
     58    Senecal, J. G., P. Lindstrom, M. A. Duchaineau, and K. I. Joy, "An Improved N-Bit to N-Bit Reversible Haar-Like Transform," Pacific Graphics 2004, October 2004, pp. 371-380.
     59  EZW:
     60    Shapiro, JM: Embedded Image Coding Using Zerotrees of Wavelet Coefficients, IEEE Trans. Signal. Process., Vol.41, pp.3445-3462 (1993).
     61 */
     62 
     63 
     64 /* Template Macro stuffs. */
     65 #undef ZYWRLE_ANALYZE
     66 #undef ZYWRLE_SYNTHESIZE
     67 #define ZYWRLE_ANALYZE __RFB_CONCAT3E(zywrleAnalyze,BPP,END_FIX)
     68 #define ZYWRLE_SYNTHESIZE __RFB_CONCAT3E(zywrleSynthesize,BPP,END_FIX)
     69 
     70 #define ZYWRLE_RGBYUV __RFB_CONCAT3E(zywrleRGBYUV,BPP,END_FIX)
     71 #define ZYWRLE_YUVRGB __RFB_CONCAT3E(zywrleYUVRGB,BPP,END_FIX)
     72 #define ZYWRLE_YMASK __RFB_CONCAT2E(ZYWRLE_YMASK,BPP)
     73 #define ZYWRLE_UVMASK __RFB_CONCAT2E(ZYWRLE_UVMASK,BPP)
     74 #define ZYWRLE_LOAD_PIXEL __RFB_CONCAT2E(ZYWRLE_LOAD_PIXEL,BPP)
     75 #define ZYWRLE_SAVE_PIXEL __RFB_CONCAT2E(ZYWRLE_SAVE_PIXEL,BPP)
     76 
     77 /* Packing/Unpacking pixel stuffs.
     78    Endian conversion stuffs. */
     79 #undef S_0
     80 #undef S_1
     81 #undef L_0
     82 #undef L_1
     83 #undef L_2
     84 #if ZYWRLE_ENDIAN == ENDIAN_BIG
     85 #  define S_0	1
     86 #  define S_1	0
     87 #  define L_0	3
     88 #  define L_1	2
     89 #  define L_2	1
     90 #else
     91 #  define S_0	0
     92 #  define S_1	1
     93 #  define L_0	0
     94 #  define L_1	1
     95 #  define L_2	2
     96 #endif
     97 
     98 /*   Load/Save pixel stuffs. */
     99 #define ZYWRLE_YMASK15  0xFFFFFFF8
    100 #define ZYWRLE_UVMASK15 0xFFFFFFF8
    101 #define ZYWRLE_LOAD_PIXEL15(pSrc,R,G,B) { \
    102 	R =  (((unsigned char*)pSrc)[S_1]<< 1)& 0xF8;	\
    103 	G = ((((unsigned char*)pSrc)[S_1]<< 6)|(((unsigned char*)pSrc)[S_0]>> 2))& 0xF8;	\
    104 	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
    105 }
    106 #define ZYWRLE_SAVE_PIXEL15(pDst,R,G,B) { \
    107 	R &= 0xF8;	\
    108 	G &= 0xF8;	\
    109 	B &= 0xF8;	\
    110 	((unsigned char*)pDst)[S_1] = (unsigned char)( (R>>1)|(G>>6)       );	\
    111 	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<2))& 0xFF);	\
    112 }
    113 #define ZYWRLE_YMASK16  0xFFFFFFFC
    114 #define ZYWRLE_UVMASK16 0xFFFFFFF8
    115 #define ZYWRLE_LOAD_PIXEL16(pSrc,R,G,B) { \
    116 	R =   ((unsigned char*)pSrc)[S_1]     & 0xF8;	\
    117 	G = ((((unsigned char*)pSrc)[S_1]<< 5)|(((unsigned char*)pSrc)[S_0]>> 3))& 0xFC;	\
    118 	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
    119 }
    120 #define ZYWRLE_SAVE_PIXEL16(pDst,R,G,B) { \
    121 	R &= 0xF8;	\
    122 	G &= 0xFC;	\
    123 	B &= 0xF8;	\
    124 	((unsigned char*)pDst)[S_1] = (unsigned char)(  R    |(G>>5)       );	\
    125 	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<3))& 0xFF);	\
    126 }
    127 #define ZYWRLE_YMASK32  0xFFFFFFFF
    128 #define ZYWRLE_UVMASK32 0xFFFFFFFF
    129 #define ZYWRLE_LOAD_PIXEL32(pSrc,R,G,B) { \
    130 	R = ((unsigned char*)pSrc)[L_2];	\
    131 	G = ((unsigned char*)pSrc)[L_1];	\
    132 	B = ((unsigned char*)pSrc)[L_0];	\
    133 }
    134 #define ZYWRLE_SAVE_PIXEL32(pDst,R,G,B) { \
    135 	((unsigned char*)pDst)[L_2] = (unsigned char)R;	\
    136 	((unsigned char*)pDst)[L_1] = (unsigned char)G;	\
    137 	((unsigned char*)pDst)[L_0] = (unsigned char)B;	\
    138 }
    139 
    140 #ifndef ZYWRLE_ONCE
    141 #define ZYWRLE_ONCE
    142 
    143 #ifdef WIN32
    144 #define InlineX __inline
    145 #else
    146 # ifndef __STRICT_ANSI__
    147 #  define InlineX inline
    148 # else
    149 #  define InlineX
    150 # endif
    151 #endif
    152 
    153 #ifdef ZYWRLE_ENCODE
    154 /* Tables for Coefficients filtering. */
    155 #  ifndef ZYWRLE_QUANTIZE
    156 /* Type A:lower bit omitting of EZW style. */
    157 const static unsigned int zywrleParam[3][3]={
    158 	{0x0000F000,0x00000000,0x00000000},
    159 	{0x0000C000,0x00F0F0F0,0x00000000},
    160 	{0x0000C000,0x00C0C0C0,0x00F0F0F0},
    161 /*	{0x0000FF00,0x00000000,0x00000000},
    162 	{0x0000FF00,0x00FFFFFF,0x00000000},
    163 	{0x0000FF00,0x00FFFFFF,0x00FFFFFF}, */
    164 };
    165 #  else
    166 /* Type B:Non liner quantization filter. */
    167 static const signed char zywrleConv[4][256]={
    168 {	/* bi=5, bo=5 r=0.0:PSNR=24.849 */
    169 	0, 0, 0, 0, 0, 0, 0, 0,
    170 	0, 0, 0, 0, 0, 0, 0, 0,
    171 	0, 0, 0, 0, 0, 0, 0, 0,
    172 	0, 0, 0, 0, 0, 0, 0, 0,
    173 	0, 0, 0, 0, 0, 0, 0, 0,
    174 	0, 0, 0, 0, 0, 0, 0, 0,
    175 	0, 0, 0, 0, 0, 0, 0, 0,
    176 	0, 0, 0, 0, 0, 0, 0, 0,
    177 	0, 0, 0, 0, 0, 0, 0, 0,
    178 	0, 0, 0, 0, 0, 0, 0, 0,
    179 	0, 0, 0, 0, 0, 0, 0, 0,
    180 	0, 0, 0, 0, 0, 0, 0, 0,
    181 	0, 0, 0, 0, 0, 0, 0, 0,
    182 	0, 0, 0, 0, 0, 0, 0, 0,
    183 	0, 0, 0, 0, 0, 0, 0, 0,
    184 	0, 0, 0, 0, 0, 0, 0, 0,
    185 	0, 0, 0, 0, 0, 0, 0, 0,
    186 	0, 0, 0, 0, 0, 0, 0, 0,
    187 	0, 0, 0, 0, 0, 0, 0, 0,
    188 	0, 0, 0, 0, 0, 0, 0, 0,
    189 	0, 0, 0, 0, 0, 0, 0, 0,
    190 	0, 0, 0, 0, 0, 0, 0, 0,
    191 	0, 0, 0, 0, 0, 0, 0, 0,
    192 	0, 0, 0, 0, 0, 0, 0, 0,
    193 	0, 0, 0, 0, 0, 0, 0, 0,
    194 	0, 0, 0, 0, 0, 0, 0, 0,
    195 	0, 0, 0, 0, 0, 0, 0, 0,
    196 	0, 0, 0, 0, 0, 0, 0, 0,
    197 	0, 0, 0, 0, 0, 0, 0, 0,
    198 	0, 0, 0, 0, 0, 0, 0, 0,
    199 	0, 0, 0, 0, 0, 0, 0, 0,
    200 	0, 0, 0, 0, 0, 0, 0, 0,
    201 },
    202 {	/* bi=5, bo=5 r=2.0:PSNR=74.031 */
    203 	0, 0, 0, 0, 0, 0, 0, 0,
    204 	0, 0, 0, 0, 0, 0, 0, 0,
    205 	0, 0, 0, 0, 0, 0, 0, 32,
    206 	32, 32, 32, 32, 32, 32, 32, 32,
    207 	32, 32, 32, 32, 32, 32, 32, 32,
    208 	48, 48, 48, 48, 48, 48, 48, 48,
    209 	48, 48, 48, 56, 56, 56, 56, 56,
    210 	56, 56, 56, 56, 64, 64, 64, 64,
    211 	64, 64, 64, 64, 72, 72, 72, 72,
    212 	72, 72, 72, 72, 80, 80, 80, 80,
    213 	80, 80, 88, 88, 88, 88, 88, 88,
    214 	88, 88, 88, 88, 88, 88, 96, 96,
    215 	96, 96, 96, 104, 104, 104, 104, 104,
    216 	104, 104, 104, 104, 104, 112, 112, 112,
    217 	112, 112, 112, 112, 112, 112, 120, 120,
    218 	120, 120, 120, 120, 120, 120, 120, 120,
    219 	0, -120, -120, -120, -120, -120, -120, -120,
    220 	-120, -120, -120, -112, -112, -112, -112, -112,
    221 	-112, -112, -112, -112, -104, -104, -104, -104,
    222 	-104, -104, -104, -104, -104, -104, -96, -96,
    223 	-96, -96, -96, -88, -88, -88, -88, -88,
    224 	-88, -88, -88, -88, -88, -88, -88, -80,
    225 	-80, -80, -80, -80, -80, -72, -72, -72,
    226 	-72, -72, -72, -72, -72, -64, -64, -64,
    227 	-64, -64, -64, -64, -64, -56, -56, -56,
    228 	-56, -56, -56, -56, -56, -56, -48, -48,
    229 	-48, -48, -48, -48, -48, -48, -48, -48,
    230 	-48, -32, -32, -32, -32, -32, -32, -32,
    231 	-32, -32, -32, -32, -32, -32, -32, -32,
    232 	-32, -32, 0, 0, 0, 0, 0, 0,
    233 	0, 0, 0, 0, 0, 0, 0, 0,
    234 	0, 0, 0, 0, 0, 0, 0, 0,
    235 },
    236 {	/* bi=5, bo=4 r=2.0:PSNR=64.441 */
    237 	0, 0, 0, 0, 0, 0, 0, 0,
    238 	0, 0, 0, 0, 0, 0, 0, 0,
    239 	0, 0, 0, 0, 0, 0, 0, 0,
    240 	0, 0, 0, 0, 0, 0, 0, 0,
    241 	48, 48, 48, 48, 48, 48, 48, 48,
    242 	48, 48, 48, 48, 48, 48, 48, 48,
    243 	48, 48, 48, 48, 48, 48, 48, 48,
    244 	64, 64, 64, 64, 64, 64, 64, 64,
    245 	64, 64, 64, 64, 64, 64, 64, 64,
    246 	80, 80, 80, 80, 80, 80, 80, 80,
    247 	80, 80, 80, 80, 80, 88, 88, 88,
    248 	88, 88, 88, 88, 88, 88, 88, 88,
    249 	104, 104, 104, 104, 104, 104, 104, 104,
    250 	104, 104, 104, 112, 112, 112, 112, 112,
    251 	112, 112, 112, 112, 120, 120, 120, 120,
    252 	120, 120, 120, 120, 120, 120, 120, 120,
    253 	0, -120, -120, -120, -120, -120, -120, -120,
    254 	-120, -120, -120, -120, -120, -112, -112, -112,
    255 	-112, -112, -112, -112, -112, -112, -104, -104,
    256 	-104, -104, -104, -104, -104, -104, -104, -104,
    257 	-104, -88, -88, -88, -88, -88, -88, -88,
    258 	-88, -88, -88, -88, -80, -80, -80, -80,
    259 	-80, -80, -80, -80, -80, -80, -80, -80,
    260 	-80, -64, -64, -64, -64, -64, -64, -64,
    261 	-64, -64, -64, -64, -64, -64, -64, -64,
    262 	-64, -48, -48, -48, -48, -48, -48, -48,
    263 	-48, -48, -48, -48, -48, -48, -48, -48,
    264 	-48, -48, -48, -48, -48, -48, -48, -48,
    265 	-48, 0, 0, 0, 0, 0, 0, 0,
    266 	0, 0, 0, 0, 0, 0, 0, 0,
    267 	0, 0, 0, 0, 0, 0, 0, 0,
    268 	0, 0, 0, 0, 0, 0, 0, 0,
    269 },
    270 {	/* bi=5, bo=2 r=2.0:PSNR=43.175 */
    271 	0, 0, 0, 0, 0, 0, 0, 0,
    272 	0, 0, 0, 0, 0, 0, 0, 0,
    273 	0, 0, 0, 0, 0, 0, 0, 0,
    274 	0, 0, 0, 0, 0, 0, 0, 0,
    275 	0, 0, 0, 0, 0, 0, 0, 0,
    276 	0, 0, 0, 0, 0, 0, 0, 0,
    277 	0, 0, 0, 0, 0, 0, 0, 0,
    278 	0, 0, 0, 0, 0, 0, 0, 0,
    279 	88, 88, 88, 88, 88, 88, 88, 88,
    280 	88, 88, 88, 88, 88, 88, 88, 88,
    281 	88, 88, 88, 88, 88, 88, 88, 88,
    282 	88, 88, 88, 88, 88, 88, 88, 88,
    283 	88, 88, 88, 88, 88, 88, 88, 88,
    284 	88, 88, 88, 88, 88, 88, 88, 88,
    285 	88, 88, 88, 88, 88, 88, 88, 88,
    286 	88, 88, 88, 88, 88, 88, 88, 88,
    287 	0, -88, -88, -88, -88, -88, -88, -88,
    288 	-88, -88, -88, -88, -88, -88, -88, -88,
    289 	-88, -88, -88, -88, -88, -88, -88, -88,
    290 	-88, -88, -88, -88, -88, -88, -88, -88,
    291 	-88, -88, -88, -88, -88, -88, -88, -88,
    292 	-88, -88, -88, -88, -88, -88, -88, -88,
    293 	-88, -88, -88, -88, -88, -88, -88, -88,
    294 	-88, -88, -88, -88, -88, -88, -88, -88,
    295 	-88, 0, 0, 0, 0, 0, 0, 0,
    296 	0, 0, 0, 0, 0, 0, 0, 0,
    297 	0, 0, 0, 0, 0, 0, 0, 0,
    298 	0, 0, 0, 0, 0, 0, 0, 0,
    299 	0, 0, 0, 0, 0, 0, 0, 0,
    300 	0, 0, 0, 0, 0, 0, 0, 0,
    301 	0, 0, 0, 0, 0, 0, 0, 0,
    302 	0, 0, 0, 0, 0, 0, 0, 0,
    303 }
    304 };
    305 const static signed char* zywrleParam[3][3][3]={
    306 	{{zywrleConv[0],zywrleConv[2],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
    307 	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
    308 	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[2],zywrleConv[2],zywrleConv[2]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]}},
    309 };
    310 #  endif
    311 #endif
    312 
    313 static InlineX void Harr(signed char* pX0, signed char* pX1)
    314 {
    315 	/* Piecewise-Linear Harr(PLHarr) */
    316 	int X0 = (int)*pX0, X1 = (int)*pX1;
    317 	int orgX0 = X0, orgX1 = X1;
    318 	if ((X0 ^ X1) & 0x80) {
    319 		/* differ sign */
    320 		X1 += X0;
    321 		if (((X1^orgX1)&0x80)==0) {
    322 			/* |X1| > |X0| */
    323 			X0 -= X1;	/* H = -B */
    324 		}
    325 	} else {
    326 		/* same sign */
    327 		X0 -= X1;
    328 		if (((X0 ^ orgX0) & 0x80) == 0) {
    329 			/* |X0| > |X1| */
    330 			X1 += X0;	/* L = A */
    331 		}
    332 	}
    333 	*pX0 = (signed char)X1;
    334 	*pX1 = (signed char)X0;
    335 }
    336 /*
    337  1D-Wavelet transform.
    338 
    339  In coefficients array, the famous 'pyramid' decomposition is well used.
    340 
    341  1D Model:
    342    |L0L0L0L0|L0L0L0L0|H0H0H0H0|H0H0H0H0| : level 0
    343    |L1L1L1L1|H1H1H1H1|H0H0H0H0|H0H0H0H0| : level 1
    344 
    345  But this method needs line buffer because H/L is different position from X0/X1.
    346  So, I used 'interleave' decomposition instead of it.
    347 
    348  1D Model:
    349    |L0H0L0H0|L0H0L0H0|L0H0L0H0|L0H0L0H0| : level 0
    350    |L1H0H1H0|L1H0H1H0|L1H0H1H0|L1H0H1H0| : level 1
    351 
    352  In this method, H/L and X0/X1 is always same position.
    353  This lead us to more speed and less memory.
    354  Of cause, the result of both method is quite same
    355  because it's only difference that coefficient position.
    356 */
    357 static InlineX void WaveletLevel(int* data, int size, int l, int SkipPixel)
    358 {
    359 	int s, ofs;
    360 	signed char* pX0;
    361 	signed char* end;
    362 
    363 	pX0 = (signed char*)data;
    364 	s = (8<<l)*SkipPixel;
    365 	end = pX0+(size>>(l+1))*s;
    366 	s -= 2;
    367 	ofs = (4<<l)*SkipPixel;
    368 	while (pX0 < end) {
    369 		Harr(pX0, pX0+ofs);
    370 		pX0++;
    371 		Harr(pX0, pX0+ofs);
    372 		pX0++;
    373 		Harr(pX0, pX0+ofs);
    374 		pX0 += s;
    375 	}
    376 }
    377 #define InvWaveletLevel(d,s,l,pix) WaveletLevel(d,s,l,pix)
    378 
    379 #ifdef ZYWRLE_ENCODE
    380 #  ifndef ZYWRLE_QUANTIZE
    381 /* Type A:lower bit omitting of EZW style. */
    382 static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
    383 {
    384 	int r, s;
    385 	int x, y;
    386 	int* pH;
    387 	const unsigned int* pM;
    388 
    389 	pM = &(zywrleParam[level-1][l]);
    390 	s = 2<<l;
    391 	for (r = 1; r < 4; r++) {
    392 		pH   = pBuf;
    393 		if (r & 0x01)
    394 			pH +=  s>>1;
    395 		if (r & 0x02)
    396 			pH += (s>>1)*width;
    397 		for (y = 0; y < height / s; y++) {
    398 			for (x = 0; x < width / s; x++) {
    399 				/*
    400 				 these are same following code.
    401 				     pH[x] = pH[x] / (~pM[x]+1) * (~pM[x]+1);
    402 				     ( round pH[x] with pM[x] bit )
    403 				 '&' operator isn't 'round' but is 'floor'.
    404 				 So, we must offset when pH[x] is negative.
    405 				*/
    406 				if (((signed char*)pH)[0] & 0x80)
    407 					((signed char*)pH)[0] += ~((signed char*)pM)[0];
    408 				if (((signed char*)pH)[1] & 0x80)
    409 					((signed char*)pH)[1] += ~((signed char*)pM)[1];
    410 				if (((signed char*)pH)[2] & 0x80)
    411 					((signed char*)pH)[2] += ~((signed char*)pM)[2];
    412 				*pH &= *pM;
    413 				pH += s;
    414 			}
    415 			pH += (s-1)*width;
    416 		}
    417 	}
    418 }
    419 #  else
    420 /*
    421  Type B:Non liner quantization filter.
    422 
    423  Coefficients have Gaussian curve and smaller value which is
    424  large part of coefficients isn't more important than larger value.
    425  So, I use filter of Non liner quantize/dequantize table.
    426  In general, Non liner quantize formula is explained as following.
    427 
    428     y=f(x)   = sign(x)*round( ((abs(x)/(2^7))^ r   )* 2^(bo-1) )*2^(8-bo)
    429     x=f-1(y) = sign(y)*round( ((abs(y)/(2^7))^(1/r))* 2^(bi-1) )*2^(8-bi)
    430  ( r:power coefficient  bi:effective MSB in input  bo:effective MSB in output )
    431 
    432    r < 1.0 : Smaller value is more important than larger value.
    433    r > 1.0 : Larger value is more important than smaller value.
    434    r = 1.0 : Liner quantization which is same with EZW style.
    435 
    436  r = 0.75 is famous non liner quantization used in MP3 audio codec.
    437  In contrast to audio data, larger value is important in wavelet coefficients.
    438  So, I select r = 2.0 table( quantize is x^2, dequantize sqrt(x) ).
    439 
    440  As compared with EZW style liner quantization, this filter tended to be
    441  more sharp edge and be more compression rate but be more blocking noise and be less quality.
    442  Especially, the surface of graphic objects has distinguishable noise in middle quality mode.
    443 
    444  We need only quantized-dequantized(filtered) value rather than quantized value itself
    445  because all values are packed or palette-lized in later ZRLE section.
    446  This lead us not to need to modify client decoder when we change
    447  the filtering procedure in future.
    448  Client only decodes coefficients given by encoder.
    449 */
    450 static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
    451 {
    452 	int r, s;
    453 	int x, y;
    454 	int* pH;
    455 	const signed char** pM;
    456 
    457 	pM = zywrleParam[level-1][l];
    458 	s = 2<<l;
    459 	for (r = 1; r < 4; r++) {
    460 		pH   = pBuf;
    461 		if (r & 0x01)
    462 			pH +=  s>>1;
    463 		if (r & 0x02)
    464 			pH += (s>>1)*width;
    465 		for (y = 0; y < height / s; y++) {
    466 			for (x = 0; x < width / s; x++) {
    467 				((signed char*)pH)[0] = pM[0][((unsigned char*)pH)[0]];
    468 				((signed char*)pH)[1] = pM[1][((unsigned char*)pH)[1]];
    469 				((signed char*)pH)[2] = pM[2][((unsigned char*)pH)[2]];
    470 				pH += s;
    471 			}
    472 			pH += (s-1)*width;
    473 		}
    474 	}
    475 }
    476 #  endif
    477 
    478 static InlineX void Wavelet(int* pBuf, int width, int height, int level)
    479 {
    480 	int l, s;
    481 	int* pTop;
    482 	int* pEnd;
    483 
    484 	for (l = 0; l < level; l++) {
    485 		pTop = pBuf;
    486 		pEnd = pBuf+height*width;
    487 		s = width<<l;
    488 		while (pTop < pEnd) {
    489 			WaveletLevel(pTop, width, l, 1);
    490 			pTop += s;
    491 		}
    492 		pTop = pBuf;
    493 		pEnd = pBuf+width;
    494 		s = 1<<l;
    495 		while (pTop < pEnd) {
    496 			WaveletLevel(pTop, height,l, width);
    497 			pTop += s;
    498 		}
    499 		FilterWaveletSquare(pBuf, width, height, level, l);
    500 	}
    501 }
    502 #endif
    503 #ifdef ZYWRLE_DECODE
    504 static InlineX void InvWavelet(int* pBuf, int width, int height, int level)
    505 {
    506 	int l, s;
    507 	int* pTop;
    508 	int* pEnd;
    509 
    510 	for (l = level - 1; l >= 0; l--) {
    511 		pTop = pBuf;
    512 		pEnd = pBuf+width;
    513 		s = 1<<l;
    514 		while (pTop < pEnd) {
    515 			InvWaveletLevel(pTop, height,l, width);
    516 			pTop += s;
    517 		}
    518 		pTop = pBuf;
    519 		pEnd = pBuf+height*width;
    520 		s = width<<l;
    521 		while (pTop < pEnd) {
    522 			InvWaveletLevel(pTop, width, l, 1);
    523 			pTop += s;
    524 		}
    525 	}
    526 }
    527 #endif
    528 
    529 /* Load/Save coefficients stuffs.
    530  Coefficients manages as 24 bits little-endian pixel. */
    531 #define ZYWRLE_LOAD_COEFF(pSrc,R,G,B) { \
    532 	R = ((signed char*)pSrc)[2];	\
    533 	G = ((signed char*)pSrc)[1];	\
    534 	B = ((signed char*)pSrc)[0];	\
    535 }
    536 #define ZYWRLE_SAVE_COEFF(pDst,R,G,B) { \
    537 	((signed char*)pDst)[2] = (signed char)R;	\
    538 	((signed char*)pDst)[1] = (signed char)G;	\
    539 	((signed char*)pDst)[0] = (signed char)B;	\
    540 }
    541 
    542 /*
    543  RGB <=> YUV conversion stuffs.
    544  YUV coversion is explained as following formula in strict meaning:
    545    Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
    546    U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
    547    V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
    548 
    549  I use simple conversion RCT(reversible color transform) which is described
    550  in JPEG-2000 specification.
    551    Y = (R + 2G + B)/4 (   0<=Y<=255)
    552    U = B-G (-256<=U<=255)
    553    V = R-G (-256<=V<=255)
    554 */
    555 #define ROUND(x) (((x)<0)?0:(((x)>255)?255:(x)))
    556 	/* RCT is N-bit RGB to N-bit Y and N+1-bit UV.
    557 	 For make Same N-bit, UV is lossy.
    558 	 More exact PLHarr, we reduce to odd range(-127<=x<=127). */
    559 #define ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ymask,uvmask) { \
    560 	Y = (R+(G<<1)+B)>>2;	\
    561 	U =  B-G;	\
    562 	V =  R-G;	\
    563 	Y -= 128;	\
    564 	U >>= 1;	\
    565 	V >>= 1;	\
    566 	Y &= ymask;	\
    567 	U &= uvmask;	\
    568 	V &= uvmask;	\
    569 	if (Y == -128)	\
    570 		Y += (0xFFFFFFFF-ymask+1);	\
    571 	if (U == -128)	\
    572 		U += (0xFFFFFFFF-uvmask+1);	\
    573 	if (V == -128)	\
    574 		V += (0xFFFFFFFF-uvmask+1);	\
    575 }
    576 #define ZYWRLE_YUVRGB1(R,G,B,Y,U,V) { \
    577 	Y += 128;	\
    578 	U <<= 1;	\
    579 	V <<= 1;	\
    580 	G = Y-((U+V)>>2);	\
    581 	B = U+G;	\
    582 	R = V+G;	\
    583 	G = ROUND(G);	\
    584 	B = ROUND(B);	\
    585 	R = ROUND(R);	\
    586 }
    587 
    588 /*
    589  coefficient packing/unpacking stuffs.
    590  Wavelet transform makes 4 sub coefficient image from 1 original image.
    591 
    592  model with pyramid decomposition:
    593    +------+------+
    594    |      |      |
    595    |  L   |  Hx  |
    596    |      |      |
    597    +------+------+
    598    |      |      |
    599    |  H   |  Hxy |
    600    |      |      |
    601    +------+------+
    602 
    603  So, we must transfer each sub images individually in strict meaning.
    604  But at least ZRLE meaning, following one decompositon image is same as
    605  avobe individual sub image. I use this format.
    606  (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
    607   for simplified procedure for any wavelet level.)
    608 
    609    +------+------+
    610    |      L      |
    611    +------+------+
    612    |      Hx     |
    613    +------+------+
    614    |      Hy     |
    615    +------+------+
    616    |      Hxy    |
    617    +------+------+
    618 */
    619 #define INC_PTR(data) \
    620 	data++;	\
    621 	if( data-pData >= (w+uw) ){	\
    622 		data += scanline-(w+uw);	\
    623 		pData = data;	\
    624 	}
    625 
    626 #define ZYWRLE_TRANSFER_COEFF(pBuf,data,r,w,h,scanline,level,TRANS)	\
    627 	pH = pBuf;	\
    628 	s = 2<<level;	\
    629 	if (r & 0x01)	\
    630 		pH +=  s>>1;	\
    631 	if (r & 0x02)	\
    632 		pH += (s>>1)*w;	\
    633 	pEnd = pH+h*w;	\
    634 	while (pH < pEnd) {	\
    635 		pLine = pH+w;	\
    636 		while (pH < pLine) {	\
    637 			TRANS	\
    638 			INC_PTR(data)	\
    639 			pH += s;	\
    640 		}	\
    641 		pH += (s-1)*w;	\
    642 	}
    643 
    644 #define ZYWRLE_PACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
    645 	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_COEFF(pH,R,G,B);ZYWRLE_SAVE_PIXEL(data,R,G,B);)
    646 
    647 #define ZYWRLE_UNPACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
    648 	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_PIXEL(data,R,G,B);ZYWRLE_SAVE_COEFF(pH,R,G,B);)
    649 
    650 #define ZYWRLE_SAVE_UNALIGN(data,TRANS)	\
    651 	pTop = pBuf+w*h;	\
    652 	pEnd = pBuf + (w+uw)*(h+uh);	\
    653 	while (pTop < pEnd) {	\
    654 		TRANS	\
    655 		INC_PTR(data)	\
    656 		pTop++;	\
    657 	}
    658 
    659 #define ZYWRLE_LOAD_UNALIGN(data,TRANS)	\
    660 	pTop = pBuf+w*h;	\
    661 	if (uw) {	\
    662 		pData=         data + w;	\
    663 		pEnd = (int*)(pData+ h*scanline);	\
    664 		while (pData < (PIXEL_T*)pEnd) {	\
    665 			pLine = (int*)(pData + uw);	\
    666 			while (pData < (PIXEL_T*)pLine) {	\
    667 				TRANS	\
    668 				pData++;	\
    669 				pTop++;	\
    670 			}	\
    671 			pData += scanline-uw;	\
    672 		}	\
    673 	}	\
    674 	if (uh) {	\
    675 		pData=         data +  h*scanline;	\
    676 		pEnd = (int*)(pData+ uh*scanline);	\
    677 		while (pData < (PIXEL_T*)pEnd) {	\
    678 			pLine = (int*)(pData + w);	\
    679 			while (pData < (PIXEL_T*)pLine) {	\
    680 				TRANS	\
    681 				pData++;	\
    682 				pTop++;	\
    683 			}	\
    684 			pData += scanline-w;	\
    685 		}	\
    686 	}	\
    687 	if (uw && uh) {	\
    688 		pData=         data + w+ h*scanline;	\
    689 		pEnd = (int*)(pData+   uh*scanline);	\
    690 		while (pData < (PIXEL_T*)pEnd) {	\
    691 			pLine = (int*)(pData + uw);	\
    692 			while (pData < (PIXEL_T*)pLine) {	\
    693 				TRANS	\
    694 				pData++;	\
    695 				pTop++;	\
    696 			}	\
    697 			pData += scanline-uw;	\
    698 		}	\
    699 	}
    700 
    701 static InlineX void zywrleCalcSize(int* pW, int* pH, int level)
    702 {
    703 	*pW &= ~((1<<level)-1);
    704 	*pH &= ~((1<<level)-1);
    705 }
    706 
    707 #endif /* ZYWRLE_ONCE */
    708 
    709 #ifndef CPIXEL
    710 #ifdef ZYWRLE_ENCODE
    711 static InlineX void ZYWRLE_RGBYUV(int* pBuf, PIXEL_T* data, int width, int height, int scanline)
    712 {
    713 	int R, G, B;
    714 	int Y, U, V;
    715 	int* pLine;
    716 	int* pEnd;
    717 	pEnd = pBuf+height*width;
    718 	while (pBuf < pEnd) {
    719 		pLine = pBuf+width;
    720 		while (pBuf < pLine) {
    721 			ZYWRLE_LOAD_PIXEL(data,R,G,B);
    722 			ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ZYWRLE_YMASK,ZYWRLE_UVMASK);
    723 			ZYWRLE_SAVE_COEFF(pBuf,V,Y,U);
    724 			pBuf++;
    725 			data++;
    726 		}
    727 		data += scanline-width;
    728 	}
    729 }
    730 #endif
    731 #ifdef ZYWRLE_DECODE
    732 static InlineX void ZYWRLE_YUVRGB(int* pBuf, PIXEL_T* data, int width, int height, int scanline) {
    733 	int R, G, B;
    734 	int Y, U, V;
    735 	int* pLine;
    736 	int* pEnd;
    737 	pEnd = pBuf+height*width;
    738 	while (pBuf < pEnd) {
    739 		pLine = pBuf+width;
    740 		while (pBuf < pLine) {
    741 			ZYWRLE_LOAD_COEFF(pBuf,V,Y,U);
    742 			ZYWRLE_YUVRGB1(R,G,B,Y,U,V);
    743 			ZYWRLE_SAVE_PIXEL(data,R,G,B);
    744 			pBuf++;
    745 			data++;
    746 		}
    747 		data += scanline-width;
    748 	}
    749 }
    750 #endif
    751 
    752 #ifdef ZYWRLE_ENCODE
    753 PIXEL_T* ZYWRLE_ANALYZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf) {
    754 	int l;
    755 	int uw = w;
    756 	int uh = h;
    757 	int* pTop;
    758 	int* pEnd;
    759 	int* pLine;
    760 	PIXEL_T* pData;
    761 	int R, G, B;
    762 	int s;
    763 	int* pH;
    764 
    765 	zywrleCalcSize(&w, &h, level);
    766 	if (w == 0 || h == 0)
    767 		return NULL;
    768 	uw -= w;
    769 	uh -= h;
    770 
    771 	pData = dst;
    772 	ZYWRLE_LOAD_UNALIGN(src,*(PIXEL_T*)pTop=*pData;)
    773 	ZYWRLE_RGBYUV(pBuf, src, w, h, scanline);
    774 	Wavelet(pBuf, w, h, level);
    775 	for (l = 0; l < level; l++) {
    776 		ZYWRLE_PACK_COEFF(pBuf, dst, 3, w, h, scanline, l);
    777 		ZYWRLE_PACK_COEFF(pBuf, dst, 2, w, h, scanline, l);
    778 		ZYWRLE_PACK_COEFF(pBuf, dst, 1, w, h, scanline, l);
    779 		if (l == level - 1) {
    780 			ZYWRLE_PACK_COEFF(pBuf, dst, 0, w, h, scanline, l);
    781 		}
    782 	}
    783 	ZYWRLE_SAVE_UNALIGN(dst,*dst=*(PIXEL_T*)pTop;)
    784 	return dst;
    785 }
    786 #endif
    787 #ifdef ZYWRLE_DECODE
    788 PIXEL_T* ZYWRLE_SYNTHESIZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf)
    789 {
    790 	int l;
    791 	int uw = w;
    792 	int uh = h;
    793 	int* pTop;
    794 	int* pEnd;
    795 	int* pLine;
    796 	PIXEL_T* pData;
    797 	int R, G, B;
    798 	int s;
    799 	int* pH;
    800 
    801 	zywrleCalcSize(&w, &h, level);
    802 	if (w == 0 || h == 0)
    803 		return NULL;
    804 	uw -= w;
    805 	uh -= h;
    806 
    807 	pData = src;
    808 	for (l = 0; l < level; l++) {
    809 		ZYWRLE_UNPACK_COEFF(pBuf, src, 3, w, h, scanline, l);
    810 		ZYWRLE_UNPACK_COEFF(pBuf, src, 2, w, h, scanline, l);
    811 		ZYWRLE_UNPACK_COEFF(pBuf, src, 1, w, h, scanline, l);
    812 		if (l == level - 1) {
    813 			ZYWRLE_UNPACK_COEFF(pBuf, src, 0, w, h, scanline, l);
    814 		}
    815 	}
    816 	ZYWRLE_SAVE_UNALIGN(src,*(PIXEL_T*)pTop=*src;)
    817 	InvWavelet(pBuf, w, h, level);
    818 	ZYWRLE_YUVRGB(pBuf, dst, w, h, scanline);
    819 	ZYWRLE_LOAD_UNALIGN(dst,*pData=*(PIXEL_T*)pTop;)
    820 	return src;
    821 }
    822 #endif
    823 #endif  /* CPIXEL */
    824 
    825 #undef ZYWRLE_RGBYUV
    826 #undef ZYWRLE_YUVRGB
    827 #undef ZYWRLE_LOAD_PIXEL
    828 #undef ZYWRLE_SAVE_PIXEL
    829