1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "mp4def.h" 19 #include "mp4enc_lib.h" 20 #include "mp4lib_int.h" 21 #include "m4venc_oscl.h" 22 23 #define VOP_OFFSET ((lx<<4)+16) /* for offset to image area */ 24 #define CVOP_OFFSET ((lx<<2)+8) 25 26 #define PREF_INTRA 512 /* bias for INTRA coding */ 27 28 /*=============================================================== 29 Function: ChooseMode 30 Date: 09/21/2000 31 Purpose: Choosing between INTRA or INTER 32 Input/Output: Pointer to the starting point of the macroblock. 33 Note: 34 ===============================================================*/ 35 void ChooseMode_C(UChar *Mode, UChar *cur, Int lx, Int min_SAD) 36 { 37 Int i, j; 38 Int MB_mean, A, tmp, Th; 39 Int offset = (lx >> 2) - 4; 40 UChar *p = cur; 41 Int *pint = (Int *) cur, temp = 0; 42 MB_mean = 0; 43 A = 0; 44 Th = (min_SAD - PREF_INTRA) >> 1; 45 46 for (j = 0; j < 8; j++) 47 { 48 49 /* Odd Rows */ 50 temp += (*pint++) & 0x00FF00FF; 51 temp += (*pint++) & 0x00FF00FF; 52 temp += (*pint++) & 0x00FF00FF; 53 temp += (*pint++) & 0x00FF00FF; 54 pint += offset; 55 56 /* Even Rows */ 57 temp += (*pint++ >> 8) & 0x00FF00FF; 58 temp += (*pint++ >> 8) & 0x00FF00FF; 59 temp += (*pint++ >> 8) & 0x00FF00FF; 60 temp += (*pint++ >> 8) & 0x00FF00FF; 61 pint += offset; 62 63 } 64 65 MB_mean = (((temp & 0x0000FFFF)) + ((temp & 0xFFFF0000) >> 16)) >> 7; 66 67 p = cur; 68 offset = lx - 16; 69 for (j = 0; j < 16; j++) 70 { 71 temp = (j & 1); 72 p += temp; 73 i = 8; 74 while (i--) 75 { 76 tmp = *p - MB_mean; 77 p += 2; 78 if (tmp > 0) A += tmp; 79 else A -= tmp; 80 } 81 82 if (A >= Th) 83 { 84 *Mode = MODE_INTER; 85 return ; 86 } 87 p += (offset - temp); 88 } 89 90 if (A < Th) 91 *Mode = MODE_INTRA; 92 else 93 *Mode = MODE_INTER; 94 95 return ; 96 } 97 98 99 /*=============================================================== 100 Function: GetHalfPelMBRegion 101 Date: 09/17/2000 102 Purpose: Interpolate the search region for half-pel search 103 Input/Output: Center of the search, Half-pel memory, width 104 Note: rounding type should be parameterized. 105 Now fixed it to zero!!!!!! 106 107 ===============================================================*/ 108 109 110 void GetHalfPelMBRegion_C(UChar *cand, UChar *hmem, Int lx) 111 { 112 Int i, j; 113 UChar *p1, *p2, *p3, *p4; 114 UChar *hmem1 = hmem; 115 UChar *hmem2 = hmem1 + 33; 116 Int offset = lx - 17; 117 118 p1 = cand - lx - 1; 119 p2 = cand - lx; 120 p3 = cand - 1; 121 p4 = cand; 122 123 for (j = 0; j < 16; j++) 124 { 125 for (i = 0; i < 16; i++) 126 { 127 *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2; 128 *hmem1++ = ((*p2++) + *p4 + 1) >> 1; 129 *hmem2++ = ((*p3++) + *p4 + 1) >> 1; 130 *hmem2++ = *p4++; 131 } 132 /* last pixel */ 133 *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2; 134 *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1; 135 hmem1 += 33; 136 hmem2 += 33; 137 p1 += offset; 138 p2 += offset; 139 p3 += offset; 140 p4 += offset; 141 } 142 /* last row */ 143 for (i = 0; i < 16; i++) 144 { 145 *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2; 146 *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1; 147 148 } 149 *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2; 150 151 return ; 152 } 153 154 /*=============================================================== 155 Function: GetHalfPelBlkRegion 156 Date: 09/20/2000 157 Purpose: Interpolate the search region for half-pel search 158 in 4MV mode. 159 Input/Output: Center of the search, Half-pel memory, width 160 Note: rounding type should be parameterized. 161 Now fixed it to zero!!!!!! 162 163 ===============================================================*/ 164 165 166 void GetHalfPelBlkRegion(UChar *cand, UChar *hmem, Int lx) 167 { 168 Int i, j; 169 UChar *p1, *p2, *p3, *p4; 170 UChar *hmem1 = hmem; 171 UChar *hmem2 = hmem1 + 17; 172 Int offset = lx - 9; 173 174 p1 = cand - lx - 1; 175 p2 = cand - lx; 176 p3 = cand - 1; 177 p4 = cand; 178 179 for (j = 0; j < 8; j++) 180 { 181 for (i = 0; i < 8; i++) 182 { 183 *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2; 184 *hmem1++ = ((*p2++) + *p4 + 1) >> 1; 185 *hmem2++ = ((*p3++) + *p4 + 1) >> 1; 186 *hmem2++ = *p4++; 187 } 188 /* last pixel */ 189 *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2; 190 *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1; 191 hmem1 += 17; 192 hmem2 += 17; 193 p1 += offset; 194 p2 += offset; 195 p3 += offset; 196 p4 += offset; 197 } 198 /* last row */ 199 for (i = 0; i < 8; i++) 200 { 201 *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2; 202 *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1; 203 204 } 205 *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2; 206 207 return ; 208 } 209 210 211 /*===================================================================== 212 Function: PaddingEdge 213 Date: 09/16/2000 214 Purpose: Pad edge of a Vop 215 Modification: 09/20/05. 216 =====================================================================*/ 217 218 void PaddingEdge(Vop *refVop) 219 { 220 UChar *src, *dst; 221 Int i; 222 Int pitch, width, height; 223 ULong temp1, temp2; 224 225 width = refVop->width; 226 height = refVop->height; 227 pitch = refVop->pitch; 228 229 /* pad top */ 230 src = refVop->yChan; 231 232 temp1 = *src; /* top-left corner */ 233 temp2 = src[width-1]; /* top-right corner */ 234 temp1 |= (temp1 << 8); 235 temp1 |= (temp1 << 16); 236 temp2 |= (temp2 << 8); 237 temp2 |= (temp2 << 16); 238 239 dst = src - (pitch << 4); 240 241 *((ULong*)(dst - 16)) = temp1; 242 *((ULong*)(dst - 12)) = temp1; 243 *((ULong*)(dst - 8)) = temp1; 244 *((ULong*)(dst - 4)) = temp1; 245 246 M4VENC_MEMCPY(dst, src, width); 247 248 *((ULong*)(dst += width)) = temp2; 249 *((ULong*)(dst + 4)) = temp2; 250 *((ULong*)(dst + 8)) = temp2; 251 *((ULong*)(dst + 12)) = temp2; 252 253 dst = dst - width - 16; 254 255 i = 15; 256 while (i--) 257 { 258 M4VENC_MEMCPY(dst + pitch, dst, pitch); 259 dst += pitch; 260 } 261 262 /* pad sides */ 263 dst += (pitch + 16); 264 src = dst; 265 i = height; 266 while (i--) 267 { 268 temp1 = *src; 269 temp2 = src[width-1]; 270 temp1 |= (temp1 << 8); 271 temp1 |= (temp1 << 16); 272 temp2 |= (temp2 << 8); 273 temp2 |= (temp2 << 16); 274 275 *((ULong*)(dst - 16)) = temp1; 276 *((ULong*)(dst - 12)) = temp1; 277 *((ULong*)(dst - 8)) = temp1; 278 *((ULong*)(dst - 4)) = temp1; 279 280 *((ULong*)(dst += width)) = temp2; 281 *((ULong*)(dst + 4)) = temp2; 282 *((ULong*)(dst + 8)) = temp2; 283 *((ULong*)(dst + 12)) = temp2; 284 285 src += pitch; 286 dst = src; 287 } 288 289 /* pad bottom */ 290 dst -= 16; 291 i = 16; 292 while (i--) 293 { 294 M4VENC_MEMCPY(dst, dst - pitch, pitch); 295 dst += pitch; 296 } 297 298 299 return ; 300 } 301 302 /*=================================================================== 303 Function: ComputeMBSum 304 Date: 10/28/2000 305 Purpose: Compute sum of absolute value (SAV) of blocks in a macroblock 306 in INTRA mode needed for rate control. Thus, instead of 307 computing the SAV, we can compute first order moment or 308 variance . 309 310 11/28/00: add MMX 311 9/3/01: do parallel comp for C function. 312 ===================================================================*/ 313 void ComputeMBSum_C(UChar *cur, Int lx, MOT *mot_mb) 314 { 315 Int j; 316 Int *cInt, *cInt2; 317 Int sad1 = 0, sad2 = 0, sad3 = 0, sad4 = 0; 318 Int tmp, tmp2, mask = 0x00FF00FF; 319 320 cInt = (Int*)cur; /* make sure this is word-align */ 321 cInt2 = (Int*)(cur + (lx << 3)); 322 j = 8; 323 while (j--) 324 { 325 tmp = cInt[3]; /* load 4 pixels at a time */ 326 tmp2 = tmp & mask; 327 tmp = (tmp >> 8) & mask; 328 tmp += tmp2; 329 sad2 += tmp; 330 tmp = cInt[2]; 331 tmp2 = tmp & mask; 332 tmp = (tmp >> 8) & mask; 333 tmp += tmp2; 334 sad2 += tmp; 335 tmp = cInt[1]; 336 tmp2 = tmp & mask; 337 tmp = (tmp >> 8) & mask; 338 tmp += tmp2; 339 sad1 += tmp; 340 tmp = *cInt; 341 cInt += (lx >> 2); 342 tmp2 = tmp & mask; 343 tmp = (tmp >> 8) & mask; 344 tmp += tmp2; 345 sad1 += tmp; 346 347 tmp = cInt2[3]; 348 tmp2 = tmp & mask; 349 tmp = (tmp >> 8) & mask; 350 tmp += tmp2; 351 sad4 += tmp; 352 tmp = cInt2[2]; 353 tmp2 = tmp & mask; 354 tmp = (tmp >> 8) & mask; 355 tmp += tmp2; 356 sad4 += tmp; 357 tmp = cInt2[1]; 358 tmp2 = tmp & mask; 359 tmp = (tmp >> 8) & mask; 360 tmp += tmp2; 361 sad3 += tmp; 362 tmp = *cInt2; 363 cInt2 += (lx >> 2); 364 tmp2 = tmp & mask; 365 tmp = (tmp >> 8) & mask; 366 tmp += tmp2; 367 sad3 += tmp; 368 } 369 sad1 += (sad1 << 16); 370 sad2 += (sad2 << 16); 371 sad3 += (sad3 << 16); 372 sad4 += (sad4 << 16); 373 sad1 >>= 16; 374 sad2 >>= 16; 375 sad3 >>= 16; 376 sad4 >>= 16; 377 378 mot_mb[1].sad = sad1; 379 mot_mb[2].sad = sad2; 380 mot_mb[3].sad = sad3; 381 mot_mb[4].sad = sad4; 382 mot_mb[0].sad = sad1 + sad2 + sad3 + sad4; 383 384 return ; 385 } 386 387