Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 /*
     44  * NV12ToARGB color space conversion CUDA kernel
     45  *
     46  * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
     47  * source and converts to output in ARGB format
     48  */
     49 
     50 #include "opencv2/opencv_modules.hpp"
     51 
     52 #ifndef HAVE_OPENCV_CUDEV
     53 
     54 #error "opencv_cudev is required"
     55 
     56 #else
     57 
     58 #include "opencv2/cudev/common.hpp"
     59 
     60 using namespace cv;
     61 using namespace cv::cudev;
     62 
     63 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height);
     64 
     65 namespace
     66 {
     67     __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
     68 
     69     __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
     70     {
     71         float luma, chromaCb, chromaCr;
     72 
     73         // Prepare for hue adjustment
     74         luma     = (float)yuvi[0];
     75         chromaCb = (float)((int)yuvi[1] - 512.0f);
     76         chromaCr = (float)((int)yuvi[2] - 512.0f);
     77 
     78        // Convert YUV To RGB with hue adjustment
     79        *red   = (luma     * constHueColorSpaceMat[0]) +
     80                 (chromaCb * constHueColorSpaceMat[1]) +
     81                 (chromaCr * constHueColorSpaceMat[2]);
     82 
     83        *green = (luma     * constHueColorSpaceMat[3]) +
     84                 (chromaCb * constHueColorSpaceMat[4]) +
     85                 (chromaCr * constHueColorSpaceMat[5]);
     86 
     87        *blue  = (luma     * constHueColorSpaceMat[6]) +
     88                 (chromaCb * constHueColorSpaceMat[7]) +
     89                 (chromaCr * constHueColorSpaceMat[8]);
     90     }
     91 
     92     __device__ static uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
     93     {
     94         uint ARGBpixel = 0;
     95 
     96         // Clamp final 10 bit results
     97         red   = ::fmin(::fmax(red,   0.0f), 1023.f);
     98         green = ::fmin(::fmax(green, 0.0f), 1023.f);
     99         blue  = ::fmin(::fmax(blue,  0.0f), 1023.f);
    100 
    101         // Convert to 8 bit unsigned integers per color component
    102         ARGBpixel = (((uint)blue  >> 2) |
    103                     (((uint)green >> 2) << 8)  |
    104                     (((uint)red   >> 2) << 16) |
    105                     (uint)alpha);
    106 
    107         return ARGBpixel;
    108     }
    109 
    110     // CUDA kernel for outputing the final ARGB output from NV12
    111 
    112     #define COLOR_COMPONENT_BIT_SIZE 10
    113     #define COLOR_COMPONENT_MASK     0x3FF
    114 
    115     __global__ void NV12_to_RGB(const uchar* srcImage, size_t nSourcePitch,
    116                                   uint* dstImage, size_t nDestPitch,
    117                                   uint width, uint height)
    118     {
    119         // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
    120         const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
    121         const int y = blockIdx.y *  blockDim.y       +  threadIdx.y;
    122 
    123         if (x >= width || y >= height)
    124             return;
    125 
    126         // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
    127         // if we move to texture we could read 4 luminance values
    128 
    129         uint yuv101010Pel[2];
    130 
    131         yuv101010Pel[0] = (srcImage[y * nSourcePitch + x    ]) << 2;
    132         yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
    133 
    134         const size_t chromaOffset = nSourcePitch * height;
    135 
    136         const int y_chroma = y >> 1;
    137 
    138         if (y & 1)  // odd scanline ?
    139         {
    140             uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x    ];
    141             uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
    142 
    143             if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
    144             {
    145                 chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x    ] + 1) >> 1;
    146                 chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
    147             }
    148 
    149             yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
    150             yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
    151 
    152             yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
    153             yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
    154         }
    155         else
    156         {
    157             yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
    158             yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
    159 
    160             yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
    161             yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
    162         }
    163 
    164         // this steps performs the color conversion
    165         uint yuvi[6];
    166         float red[2], green[2], blue[2];
    167 
    168         yuvi[0] =  (yuv101010Pel[0] &   COLOR_COMPONENT_MASK    );
    169         yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
    170         yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
    171 
    172         yuvi[3] =  (yuv101010Pel[1] &   COLOR_COMPONENT_MASK    );
    173         yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
    174         yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
    175 
    176         // YUV to RGB Transformation conversion
    177         YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
    178         YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
    179 
    180         // Clamp the results to RGBA
    181 
    182         const size_t dstImagePitch = nDestPitch >> 2;
    183 
    184         dstImage[y * dstImagePitch + x     ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24));
    185         dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24));
    186     }
    187 }
    188 
    189 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
    190 {
    191     // Final Stage: NV12toARGB color space conversion
    192 
    193     _outFrame.create(height, width, CV_8UC4);
    194     GpuMat outFrame = _outFrame.getGpuMat();
    195 
    196     dim3 block(32, 8);
    197     dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
    198 
    199     NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step,
    200                                  outFrame.ptr<uint>(), outFrame.step,
    201                                  width, height);
    202 
    203     CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    204     CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    205 }
    206 
    207 #endif
    208