1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /*------------------------------------------------------------------------------ 18 19 Table of contents 20 21 1. Include headers 22 2. External compiler flags 23 3. Module defines 24 4. Local function prototypes 25 5. Functions 26 h264bsdWriteMacroblock 27 h264bsdWriteOutputBlocks 28 29 ------------------------------------------------------------------------------*/ 30 31 /*------------------------------------------------------------------------------ 32 1. Include headers 33 ------------------------------------------------------------------------------*/ 34 35 #include "h264bsd_image.h" 36 #include "h264bsd_util.h" 37 #include "h264bsd_neighbour.h" 38 39 /*------------------------------------------------------------------------------ 40 2. External compiler flags 41 -------------------------------------------------------------------------------- 42 43 -------------------------------------------------------------------------------- 44 3. Module defines 45 ------------------------------------------------------------------------------*/ 46 47 /* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */ 48 extern const u32 h264bsdBlockX[]; 49 extern const u32 h264bsdBlockY[]; 50 51 /* clipping table, defined in h264bsd_intra_prediction.c */ 52 extern const u8 h264bsdClip[]; 53 54 /*------------------------------------------------------------------------------ 55 4. Local function prototypes 56 ------------------------------------------------------------------------------*/ 57 58 59 60 /*------------------------------------------------------------------------------ 61 62 Function: h264bsdWriteMacroblock 63 64 Functional description: 65 Write one macroblock into the image. Both luma and chroma 66 components will be written at the same time. 67 68 Inputs: 69 data pointer to macroblock data to be written, 256 values for 70 luma followed by 64 values for both chroma components 71 72 Outputs: 73 image pointer to the image where the macroblock will be written 74 75 Returns: 76 none 77 78 ------------------------------------------------------------------------------*/ 79 #ifndef H264DEC_NEON 80 void h264bsdWriteMacroblock(image_t *image, u8 *data) 81 { 82 83 /* Variables */ 84 85 u32 i; 86 u32 width; 87 u32 *lum, *cb, *cr; 88 u32 *ptr; 89 u32 tmp1, tmp2; 90 91 /* Code */ 92 93 ASSERT(image); 94 ASSERT(data); 95 ASSERT(!((u32)data&0x3)); 96 97 width = image->width; 98 99 /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable 100 * "area too small" info message */ 101 lum = (u32*)image->luma; 102 cb = (u32*)image->cb; 103 cr = (u32*)image->cr; 104 ASSERT(!((u32)lum&0x3)); 105 ASSERT(!((u32)cb&0x3)); 106 ASSERT(!((u32)cr&0x3)); 107 108 ptr = (u32*)data; 109 110 width *= 4; 111 for (i = 16; i ; i--) 112 { 113 tmp1 = *ptr++; 114 tmp2 = *ptr++; 115 *lum++ = tmp1; 116 *lum++ = tmp2; 117 tmp1 = *ptr++; 118 tmp2 = *ptr++; 119 *lum++ = tmp1; 120 *lum++ = tmp2; 121 lum += width-4; 122 } 123 124 width >>= 1; 125 for (i = 8; i ; i--) 126 { 127 tmp1 = *ptr++; 128 tmp2 = *ptr++; 129 *cb++ = tmp1; 130 *cb++ = tmp2; 131 cb += width-2; 132 } 133 134 for (i = 8; i ; i--) 135 { 136 tmp1 = *ptr++; 137 tmp2 = *ptr++; 138 *cr++ = tmp1; 139 *cr++ = tmp2; 140 cr += width-2; 141 } 142 143 } 144 #endif 145 #ifndef H264DEC_OMXDL 146 /*------------------------------------------------------------------------------ 147 148 Function: h264bsdWriteOutputBlocks 149 150 Functional description: 151 Write one macroblock into the image. Prediction for the macroblock 152 and the residual are given separately and will be combined while 153 writing the data to the image 154 155 Inputs: 156 data pointer to macroblock prediction data, 256 values for 157 luma followed by 64 values for both chroma components 158 mbNum number of the macroblock 159 residual pointer to residual data, 16 16-element arrays for luma 160 followed by 4 16-element arrays for both chroma 161 components 162 163 Outputs: 164 image pointer to the image where the data will be written 165 166 Returns: 167 none 168 169 ------------------------------------------------------------------------------*/ 170 171 void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data, 172 i32 residual[][16]) 173 { 174 175 /* Variables */ 176 177 u32 i; 178 u32 picWidth, picSize; 179 u8 *lum, *cb, *cr; 180 u8 *imageBlock; 181 u8 *tmp; 182 u32 row, col; 183 u32 block; 184 u32 x, y; 185 i32 *pRes; 186 i32 tmp1, tmp2, tmp3, tmp4; 187 const u8 *clp = h264bsdClip + 512; 188 189 /* Code */ 190 191 ASSERT(image); 192 ASSERT(data); 193 ASSERT(mbNum < image->width * image->height); 194 ASSERT(!((u32)data&0x3)); 195 196 /* Image size in macroblocks */ 197 picWidth = image->width; 198 picSize = picWidth * image->height; 199 row = mbNum / picWidth; 200 col = mbNum % picWidth; 201 202 /* Output macroblock position in output picture */ 203 lum = (image->data + row * picWidth * 256 + col * 16); 204 cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8); 205 cr = (cb + picSize * 64); 206 207 picWidth *= 16; 208 209 for (block = 0; block < 16; block++) 210 { 211 x = h264bsdBlockX[block]; 212 y = h264bsdBlockY[block]; 213 214 pRes = residual[block]; 215 216 ASSERT(pRes); 217 218 tmp = data + y*16 + x; 219 imageBlock = lum + y*picWidth + x; 220 221 ASSERT(!((u32)tmp&0x3)); 222 ASSERT(!((u32)imageBlock&0x3)); 223 224 if (IS_RESIDUAL_EMPTY(pRes)) 225 { 226 /*lint -e826 */ 227 i32 *in32 = (i32*)tmp; 228 i32 *out32 = (i32*)imageBlock; 229 230 /* Residual is zero => copy prediction block to output */ 231 tmp1 = *in32; in32 += 4; 232 tmp2 = *in32; in32 += 4; 233 *out32 = tmp1; out32 += picWidth/4; 234 *out32 = tmp2; out32 += picWidth/4; 235 tmp1 = *in32; in32 += 4; 236 tmp2 = *in32; 237 *out32 = tmp1; out32 += picWidth/4; 238 *out32 = tmp2; 239 } 240 else 241 { 242 243 RANGE_CHECK_ARRAY(pRes, -512, 511, 16); 244 245 /* Calculate image = prediction + residual 246 * Process four pixels in a loop */ 247 for (i = 4; i; i--) 248 { 249 tmp1 = tmp[0]; 250 tmp2 = *pRes++; 251 tmp3 = tmp[1]; 252 tmp1 = clp[tmp1 + tmp2]; 253 tmp4 = *pRes++; 254 imageBlock[0] = (u8)tmp1; 255 tmp3 = clp[tmp3 + tmp4]; 256 tmp1 = tmp[2]; 257 tmp2 = *pRes++; 258 imageBlock[1] = (u8)tmp3; 259 tmp1 = clp[tmp1 + tmp2]; 260 tmp3 = tmp[3]; 261 tmp4 = *pRes++; 262 imageBlock[2] = (u8)tmp1; 263 tmp3 = clp[tmp3 + tmp4]; 264 tmp += 16; 265 imageBlock[3] = (u8)tmp3; 266 imageBlock += picWidth; 267 } 268 } 269 270 } 271 272 picWidth /= 2; 273 274 for (block = 16; block <= 23; block++) 275 { 276 x = h264bsdBlockX[block & 0x3]; 277 y = h264bsdBlockY[block & 0x3]; 278 279 pRes = residual[block]; 280 281 ASSERT(pRes); 282 283 tmp = data + 256; 284 imageBlock = cb; 285 286 if (block >= 20) 287 { 288 imageBlock = cr; 289 tmp += 64; 290 } 291 292 tmp += y*8 + x; 293 imageBlock += y*picWidth + x; 294 295 ASSERT(!((u32)tmp&0x3)); 296 ASSERT(!((u32)imageBlock&0x3)); 297 298 if (IS_RESIDUAL_EMPTY(pRes)) 299 { 300 /*lint -e826 */ 301 i32 *in32 = (i32*)tmp; 302 i32 *out32 = (i32*)imageBlock; 303 304 /* Residual is zero => copy prediction block to output */ 305 tmp1 = *in32; in32 += 2; 306 tmp2 = *in32; in32 += 2; 307 *out32 = tmp1; out32 += picWidth/4; 308 *out32 = tmp2; out32 += picWidth/4; 309 tmp1 = *in32; in32 += 2; 310 tmp2 = *in32; 311 *out32 = tmp1; out32 += picWidth/4; 312 *out32 = tmp2; 313 } 314 else 315 { 316 317 RANGE_CHECK_ARRAY(pRes, -512, 511, 16); 318 319 for (i = 4; i; i--) 320 { 321 tmp1 = tmp[0]; 322 tmp2 = *pRes++; 323 tmp3 = tmp[1]; 324 tmp1 = clp[tmp1 + tmp2]; 325 tmp4 = *pRes++; 326 imageBlock[0] = (u8)tmp1; 327 tmp3 = clp[tmp3 + tmp4]; 328 tmp1 = tmp[2]; 329 tmp2 = *pRes++; 330 imageBlock[1] = (u8)tmp3; 331 tmp1 = clp[tmp1 + tmp2]; 332 tmp3 = tmp[3]; 333 tmp4 = *pRes++; 334 imageBlock[2] = (u8)tmp1; 335 tmp3 = clp[tmp3 + tmp4]; 336 tmp += 8; 337 imageBlock[3] = (u8)tmp3; 338 imageBlock += picWidth; 339 } 340 } 341 } 342 343 } 344 #endif /* H264DEC_OMXDL */ 345 346