1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 /* 44 * NV12ToARGB color space conversion CUDA kernel 45 * 46 * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar) 47 * source and converts to output in ARGB format 48 */ 49 50 #include "opencv2/opencv_modules.hpp" 51 52 #ifndef HAVE_OPENCV_CUDEV 53 54 #error "opencv_cudev is required" 55 56 #else 57 58 #include "opencv2/cudev/common.hpp" 59 60 using namespace cv; 61 using namespace cv::cudev; 62 63 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height); 64 65 namespace 66 { 67 __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f}; 68 69 __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) 70 { 71 float luma, chromaCb, chromaCr; 72 73 // Prepare for hue adjustment 74 luma = (float)yuvi[0]; 75 chromaCb = (float)((int)yuvi[1] - 512.0f); 76 chromaCr = (float)((int)yuvi[2] - 512.0f); 77 78 // Convert YUV To RGB with hue adjustment 79 *red = (luma * constHueColorSpaceMat[0]) + 80 (chromaCb * constHueColorSpaceMat[1]) + 81 (chromaCr * constHueColorSpaceMat[2]); 82 83 *green = (luma * constHueColorSpaceMat[3]) + 84 (chromaCb * constHueColorSpaceMat[4]) + 85 (chromaCr * constHueColorSpaceMat[5]); 86 87 *blue = (luma * constHueColorSpaceMat[6]) + 88 (chromaCb * constHueColorSpaceMat[7]) + 89 (chromaCr * constHueColorSpaceMat[8]); 90 } 91 92 __device__ static uint RGBA_pack_10bit(float red, float green, float blue, uint alpha) 93 { 94 uint ARGBpixel = 0; 95 96 // Clamp final 10 bit results 97 red = ::fmin(::fmax(red, 0.0f), 1023.f); 98 green = ::fmin(::fmax(green, 0.0f), 1023.f); 99 blue = ::fmin(::fmax(blue, 0.0f), 1023.f); 100 101 // Convert to 8 bit unsigned integers per color component 102 ARGBpixel = (((uint)blue >> 2) | 103 (((uint)green >> 2) << 8) | 104 (((uint)red >> 2) << 16) | 105 (uint)alpha); 106 107 return ARGBpixel; 108 } 109 110 // CUDA kernel for outputing the final ARGB output from NV12 111 112 #define COLOR_COMPONENT_BIT_SIZE 10 113 #define COLOR_COMPONENT_MASK 0x3FF 114 115 __global__ void NV12_to_RGB(const uchar* srcImage, size_t nSourcePitch, 116 uint* dstImage, size_t nDestPitch, 117 uint width, uint height) 118 { 119 // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread 120 const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); 121 const int y = blockIdx.y * blockDim.y + threadIdx.y; 122 123 if (x >= width || y >= height) 124 return; 125 126 // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. 127 // if we move to texture we could read 4 luminance values 128 129 uint yuv101010Pel[2]; 130 131 yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2; 132 yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2; 133 134 const size_t chromaOffset = nSourcePitch * height; 135 136 const int y_chroma = y >> 1; 137 138 if (y & 1) // odd scanline ? 139 { 140 uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ]; 141 uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1]; 142 143 if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically 144 { 145 chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1; 146 chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1; 147 } 148 149 yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); 150 yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 151 152 yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); 153 yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 154 } 155 else 156 { 157 yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); 158 yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 159 160 yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); 161 yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 162 } 163 164 // this steps performs the color conversion 165 uint yuvi[6]; 166 float red[2], green[2], blue[2]; 167 168 yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK ); 169 yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 170 yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 171 172 yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK ); 173 yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 174 yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 175 176 // YUV to RGB Transformation conversion 177 YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); 178 YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); 179 180 // Clamp the results to RGBA 181 182 const size_t dstImagePitch = nDestPitch >> 2; 183 184 dstImage[y * dstImagePitch + x ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24)); 185 dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24)); 186 } 187 } 188 189 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height) 190 { 191 // Final Stage: NV12toARGB color space conversion 192 193 _outFrame.create(height, width, CV_8UC4); 194 GpuMat outFrame = _outFrame.getGpuMat(); 195 196 dim3 block(32, 8); 197 dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y)); 198 199 NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step, 200 outFrame.ptr<uint>(), outFrame.step, 201 width, height); 202 203 CV_CUDEV_SAFE_CALL( cudaGetLastError() ); 204 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); 205 } 206 207 #endif 208