Home | History | Annotate | Download | only in opencl
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of the copyright holders may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #if kercn != 3
     43 #define loadpix(addr) *(__global const T *)(addr)
     44 #define storepix(val, addr)  *(__global T *)(addr) = val
     45 #define TSIZE (int)sizeof(T)
     46 #else
     47 #define loadpix(addr) vload3(0, (__global const T1 *)(addr))
     48 #define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
     49 #define TSIZE ((int)sizeof(T1)*3)
     50 #endif
     51 
     52 __kernel void arithm_flip_rows(__global const uchar * srcptr, int src_step, int src_offset,
     53                                __global uchar * dstptr, int dst_step, int dst_offset,
     54                                int rows, int cols, int thread_rows, int thread_cols)
     55 {
     56     int x = get_global_id(0);
     57     int y0 = get_global_id(1) * PIX_PER_WI_Y;
     58 
     59     if (x < cols)
     60     {
     61         int src_index0 = mad24(y0, src_step, mad24(x, TSIZE, src_offset));
     62         int src_index1 = mad24(rows - y0 - 1, src_step, mad24(x, TSIZE, src_offset));
     63         int dst_index0 = mad24(y0, dst_step, mad24(x, TSIZE, dst_offset));
     64         int dst_index1 = mad24(rows - y0 - 1, dst_step, mad24(x, TSIZE, dst_offset));
     65 
     66         #pragma unroll
     67         for (int y = y0, y1 = min(thread_rows, y0 + PIX_PER_WI_Y); y < y1; ++y)
     68         {
     69             T src0 = loadpix(srcptr + src_index0);
     70             T src1 = loadpix(srcptr + src_index1);
     71 
     72             storepix(src1, dstptr + dst_index0);
     73             storepix(src0, dstptr + dst_index1);
     74 
     75             src_index0 += src_step;
     76             src_index1 -= src_step;
     77             dst_index0 += dst_step;
     78             dst_index1 -= dst_step;
     79         }
     80     }
     81 }
     82 
     83 __kernel void arithm_flip_rows_cols(__global const uchar * srcptr, int src_step, int src_offset,
     84                                     __global uchar * dstptr, int dst_step, int dst_offset,
     85                                     int rows, int cols, int thread_rows, int thread_cols)
     86 {
     87     int x = get_global_id(0);
     88     int y0 = get_global_id(1)*PIX_PER_WI_Y;
     89 
     90     if (x < cols)
     91     {
     92         int src_index0 = mad24(y0, src_step, mad24(x, TSIZE, src_offset));
     93         int src_index1 = mad24(rows - y0 - 1, src_step, mad24(cols - x - 1, TSIZE, src_offset));
     94         int dst_index0 = mad24(y0, dst_step, mad24(x, TSIZE, dst_offset));
     95         int dst_index1 = mad24(rows - y0 - 1, dst_step, mad24(cols - x - 1, TSIZE, dst_offset));
     96 
     97         #pragma unroll
     98         for (int y = y0, y1 = min(thread_rows, y0 + PIX_PER_WI_Y); y < y1; ++y)
     99         {
    100             T src0 = loadpix(srcptr + src_index0);
    101             T src1 = loadpix(srcptr + src_index1);
    102 
    103 #if kercn == 2
    104 #if cn == 1
    105             src0 = src0.s10;
    106             src1 = src1.s10;
    107 #endif
    108 #elif kercn == 4
    109 #if cn == 1
    110             src0 = src0.s3210;
    111             src1 = src1.s3210;
    112 #elif cn == 2
    113             src0 = src0.s2301;
    114             src1 = src1.s2301;
    115 #endif
    116 #endif
    117 
    118             storepix(src1, dstptr + dst_index0);
    119             storepix(src0, dstptr + dst_index1);
    120 
    121             src_index0 += src_step;
    122             src_index1 -= src_step;
    123             dst_index0 += dst_step;
    124             dst_index1 -= dst_step;
    125         }
    126     }
    127 }
    128 
    129 __kernel void arithm_flip_cols(__global const uchar * srcptr, int src_step, int src_offset,
    130                                __global uchar * dstptr, int dst_step, int dst_offset,
    131                                int rows, int cols, int thread_rows, int thread_cols)
    132 {
    133     int x = get_global_id(0);
    134     int y0 = get_global_id(1)*PIX_PER_WI_Y;
    135 
    136     if (x < thread_cols)
    137     {
    138         int src_index0 = mad24(y0, src_step, mad24(x, TSIZE, src_offset));
    139         int src_index1 = mad24(y0, src_step, mad24(cols - x - 1, TSIZE, src_offset));
    140         int dst_index0 = mad24(y0, dst_step, mad24(x, TSIZE, dst_offset));
    141         int dst_index1 = mad24(y0, dst_step, mad24(cols - x - 1, TSIZE, dst_offset));
    142 
    143         #pragma unroll
    144         for (int y = y0, y1 = min(rows, y0 + PIX_PER_WI_Y); y < y1; ++y)
    145         {
    146             T src0 = loadpix(srcptr + src_index0);
    147             T src1 = loadpix(srcptr + src_index1);
    148 
    149 #if kercn == 2
    150 #if cn == 1
    151             src0 = src0.s10;
    152             src1 = src1.s10;
    153 #endif
    154 #elif kercn == 4
    155 #if cn == 1
    156             src0 = src0.s3210;
    157             src1 = src1.s3210;
    158 #elif cn == 2
    159             src0 = src0.s2301;
    160             src1 = src1.s2301;
    161 #endif
    162 #endif
    163 
    164             storepix(src1, dstptr + dst_index0);
    165             storepix(src0, dstptr + dst_index1);
    166 
    167             src_index0 += src_step;
    168             src_index1 += src_step;
    169             dst_index0 += dst_step;
    170             dst_index1 += dst_step;
    171         }
    172     }
    173 }
    174