1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "avclib_common.h" 19 20 /* input are in the first 16 elements of block, 21 output must be in the location specified in Figure 8-6. */ 22 /* subclause 8.5.6 */ 23 void Intra16DCTrans(int16 *block, int Qq, int Rq) 24 { 25 int m0, m1, m2, m3; 26 int j, offset; 27 int16 *inout; 28 int scale = dequant_coefres[Rq][0]; 29 30 inout = block; 31 for (j = 0; j < 4; j++) 32 { 33 m0 = inout[0] + inout[4]; 34 m1 = inout[0] - inout[4]; 35 m2 = inout[8] + inout[12]; 36 m3 = inout[8] - inout[12]; 37 38 39 inout[0] = m0 + m2; 40 inout[4] = m0 - m2; 41 inout[8] = m1 - m3; 42 inout[12] = m1 + m3; 43 inout += 64; 44 } 45 46 inout = block; 47 48 if (Qq >= 2) /* this way should be faster than JM */ 49 { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */ 50 Qq -= 2; 51 for (j = 0; j < 4; j++) 52 { 53 m0 = inout[0] + inout[64]; 54 m1 = inout[0] - inout[64]; 55 m2 = inout[128] + inout[192]; 56 m3 = inout[128] - inout[192]; 57 58 inout[0] = ((m0 + m2) * scale) << Qq; 59 inout[64] = ((m0 - m2) * scale) << Qq; 60 inout[128] = ((m1 - m3) * scale) << Qq; 61 inout[192] = ((m1 + m3) * scale) << Qq; 62 inout += 4; 63 } 64 } 65 else 66 { 67 Qq = 2 - Qq; 68 offset = 1 << (Qq - 1); 69 70 for (j = 0; j < 4; j++) 71 { 72 m0 = inout[0] + inout[64]; 73 m1 = inout[0] - inout[64]; 74 m2 = inout[128] + inout[192]; 75 m3 = inout[128] - inout[192]; 76 77 inout[0] = (((m0 + m2) * scale + offset) >> Qq); 78 inout[64] = (((m0 - m2) * scale + offset) >> Qq); 79 inout[128] = (((m1 - m3) * scale + offset) >> Qq); 80 inout[192] = (((m1 + m3) * scale + offset) >> Qq); 81 inout += 4; 82 } 83 } 84 85 return ; 86 } 87 88 /* see subclase 8.5.8 */ 89 void itrans(int16 *block, uint8 *pred, uint8 *cur, int width) 90 { 91 int e0, e1, e2, e3; /* note, at every step of the calculation, these values */ 92 /* shall never exceed 16bit sign value, but we don't check */ 93 int i; /* to save the cycles. */ 94 int16 *inout; 95 96 inout = block; 97 98 for (i = 4; i > 0; i--) 99 { 100 e0 = inout[0] + inout[2]; 101 e1 = inout[0] - inout[2]; 102 e2 = (inout[1] >> 1) - inout[3]; 103 e3 = inout[1] + (inout[3] >> 1); 104 105 inout[0] = e0 + e3; 106 inout[1] = e1 + e2; 107 inout[2] = e1 - e2; 108 inout[3] = e0 - e3; 109 110 inout += 16; 111 } 112 113 for (i = 4; i > 0; i--) 114 { 115 e0 = block[0] + block[32]; 116 e1 = block[0] - block[32]; 117 e2 = (block[16] >> 1) - block[48]; 118 e3 = block[16] + (block[48] >> 1); 119 120 e0 += e3; 121 e3 = (e0 - (e3 << 1)); /* e0-e3 */ 122 e1 += e2; 123 e2 = (e1 - (e2 << 1)); /* e1-e2 */ 124 e0 += 32; 125 e1 += 32; 126 e2 += 32; 127 e3 += 32; 128 #ifdef USE_PRED_BLOCK 129 e0 = pred[0] + (e0 >> 6); 130 if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */ 131 e1 = pred[20] + (e1 >> 6); 132 if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */ 133 e2 = pred[40] + (e2 >> 6); 134 if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */ 135 e3 = pred[60] + (e3 >> 6); 136 if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */ 137 *cur = e0; 138 *(cur += width) = e1; 139 *(cur += width) = e2; 140 cur[width] = e3; 141 cur -= (width << 1); 142 cur++; 143 pred++; 144 #else 145 OSCL_UNUSED_ARG(pred); 146 147 e0 = *cur + (e0 >> 6); 148 if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */ 149 *cur = e0; 150 e1 = *(cur += width) + (e1 >> 6); 151 if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */ 152 *cur = e1; 153 e2 = *(cur += width) + (e2 >> 6); 154 if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */ 155 *cur = e2; 156 e3 = cur[width] + (e3 >> 6); 157 if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */ 158 cur[width] = e3; 159 cur -= (width << 1); 160 cur++; 161 #endif 162 block++; 163 } 164 165 return ; 166 } 167 168 /* see subclase 8.5.8 */ 169 void ictrans(int16 *block, uint8 *pred, uint8 *cur, int width) 170 { 171 int e0, e1, e2, e3; /* note, at every step of the calculation, these values */ 172 /* shall never exceed 16bit sign value, but we don't check */ 173 int i; /* to save the cycles. */ 174 int16 *inout; 175 176 inout = block; 177 178 for (i = 4; i > 0; i--) 179 { 180 e0 = inout[0] + inout[2]; 181 e1 = inout[0] - inout[2]; 182 e2 = (inout[1] >> 1) - inout[3]; 183 e3 = inout[1] + (inout[3] >> 1); 184 185 inout[0] = e0 + e3; 186 inout[1] = e1 + e2; 187 inout[2] = e1 - e2; 188 inout[3] = e0 - e3; 189 190 inout += 16; 191 } 192 193 for (i = 4; i > 0; i--) 194 { 195 e0 = block[0] + block[32]; 196 e1 = block[0] - block[32]; 197 e2 = (block[16] >> 1) - block[48]; 198 e3 = block[16] + (block[48] >> 1); 199 200 e0 += e3; 201 e3 = (e0 - (e3 << 1)); /* e0-e3 */ 202 e1 += e2; 203 e2 = (e1 - (e2 << 1)); /* e1-e2 */ 204 e0 += 32; 205 e1 += 32; 206 e2 += 32; 207 e3 += 32; 208 #ifdef USE_PRED_BLOCK 209 e0 = pred[0] + (e0 >> 6); 210 if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */ 211 e1 = pred[12] + (e1 >> 6); 212 if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */ 213 e2 = pred[24] + (e2 >> 6); 214 if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */ 215 e3 = pred[36] + (e3 >> 6); 216 if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */ 217 *cur = e0; 218 *(cur += width) = e1; 219 *(cur += width) = e2; 220 cur[width] = e3; 221 cur -= (width << 1); 222 cur++; 223 pred++; 224 #else 225 OSCL_UNUSED_ARG(pred); 226 227 e0 = *cur + (e0 >> 6); 228 if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */ 229 *cur = e0; 230 e1 = *(cur += width) + (e1 >> 6); 231 if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */ 232 *cur = e1; 233 e2 = *(cur += width) + (e2 >> 6); 234 if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */ 235 *cur = e2; 236 e3 = cur[width] + (e3 >> 6); 237 if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */ 238 cur[width] = e3; 239 cur -= (width << 1); 240 cur++; 241 #endif 242 block++; 243 } 244 245 return ; 246 } 247 248 /* see subclause 8.5.7 */ 249 void ChromaDCTrans(int16 *block, int Qq, int Rq) 250 { 251 int c00, c01, c10, c11; 252 int f0, f1, f2, f3; 253 int scale = dequant_coefres[Rq][0]; 254 255 c00 = block[0] + block[4]; 256 c01 = block[0] - block[4]; 257 c10 = block[64] + block[68]; 258 c11 = block[64] - block[68]; 259 260 f0 = c00 + c10; 261 f1 = c01 + c11; 262 f2 = c00 - c10; 263 f3 = c01 - c11; 264 265 if (Qq >= 1) 266 { 267 Qq -= 1; 268 block[0] = (f0 * scale) << Qq; 269 block[4] = (f1 * scale) << Qq; 270 block[64] = (f2 * scale) << Qq; 271 block[68] = (f3 * scale) << Qq; 272 } 273 else 274 { 275 block[0] = (f0 * scale) >> 1; 276 block[4] = (f1 * scale) >> 1; 277 block[64] = (f2 * scale) >> 1; 278 block[68] = (f3 * scale) >> 1; 279 } 280 281 return ; 282 } 283 284 285 void copy_block(uint8 *pred, uint8 *cur, int width, int pred_pitch) 286 { 287 uint32 temp; 288 289 temp = *((uint32*)pred); 290 pred += pred_pitch; 291 *((uint32*)cur) = temp; 292 cur += width; 293 temp = *((uint32*)pred); 294 pred += pred_pitch; 295 *((uint32*)cur) = temp; 296 cur += width; 297 temp = *((uint32*)pred); 298 pred += pred_pitch; 299 *((uint32*)cur) = temp; 300 cur += width; 301 temp = *((uint32*)pred); 302 *((uint32*)cur) = temp; 303 304 return ; 305 } 306 307 308