Home | History | Annotate | Download | only in MagickCore
      1 /*
      2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      3 %                                                                             %
      4 %                                                                             %
      5 %                                                                             %
      6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
      7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
      8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
      9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
     10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
     11 %                                                                             %
     12 %                                                                             %
     13 %                       MagickCore Acceleration Methods                       %
     14 %                                                                             %
     15 %                              Software Design                                %
     16 %                                  Cristy                                     %
     17 %                               SiuChi Chan                                   %
     18 %                              Guansong Zhang                                 %
     19 %                               January 2010                                  %
     20 %                               Dirk Lemstra                                  %
     21 %                                April 2016                                   %
     22 %                                                                             %
     23 %                                                                             %
     24 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
     25 %  dedicated to making software imaging solutions freely available.           %
     26 %                                                                             %
     27 %  You may not use this file except in compliance with the License.  You may  %
     28 %  obtain a copy of the License at                                            %
     29 %                                                                             %
     30 %    http://www.imagemagick.org/script/license.php                            %
     31 %                                                                             %
     32 %  Unless required by applicable law or agreed to in writing, software        %
     33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
     34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
     35 %  See the License for the specific language governing permissions and        %
     36 %  limitations under the License.                                             %
     37 %                                                                             %
     38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     39 */
     40 
     41 /*
     42 Include declarations.
     43 */
     44 #include "MagickCore/studio.h"
     45 #include "MagickCore/accelerate-private.h"
     46 #include "MagickCore/accelerate-kernels-private.h"
     47 #include "MagickCore/artifact.h"
     48 #include "MagickCore/cache.h"
     49 #include "MagickCore/cache-private.h"
     50 #include "MagickCore/cache-view.h"
     51 #include "MagickCore/color-private.h"
     52 #include "MagickCore/delegate-private.h"
     53 #include "MagickCore/enhance.h"
     54 #include "MagickCore/exception.h"
     55 #include "MagickCore/exception-private.h"
     56 #include "MagickCore/gem.h"
     57 #include "MagickCore/image.h"
     58 #include "MagickCore/image-private.h"
     59 #include "MagickCore/linked-list.h"
     60 #include "MagickCore/list.h"
     61 #include "MagickCore/memory_.h"
     62 #include "MagickCore/monitor-private.h"
     63 #include "MagickCore/opencl.h"
     64 #include "MagickCore/opencl-private.h"
     65 #include "MagickCore/option.h"
     66 #include "MagickCore/pixel-accessor.h"
     67 #include "MagickCore/pixel-private.h"
     68 #include "MagickCore/prepress.h"
     69 #include "MagickCore/quantize.h"
     70 #include "MagickCore/quantum-private.h"
     71 #include "MagickCore/random_.h"
     72 #include "MagickCore/random-private.h"
     73 #include "MagickCore/registry.h"
     74 #include "MagickCore/resize.h"
     75 #include "MagickCore/resize-private.h"
     76 #include "MagickCore/semaphore.h"
     77 #include "MagickCore/splay-tree.h"
     78 #include "MagickCore/statistic.h"
     79 #include "MagickCore/string_.h"
     80 #include "MagickCore/string-private.h"
     81 #include "MagickCore/token.h"
     82 
     83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
     84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
     85 
     86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
     87 
     88 /*
     89   Define declarations.
     90 */
     91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
     92 
     93 /*
     94   Static declarations.
     95 */
     96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
     97 {
     98   BoxWeightingFunction,
     99   TriangleWeightingFunction,
    100   HannWeightingFunction,
    101   HammingWeightingFunction,
    102   BlackmanWeightingFunction,
    103   CubicBCWeightingFunction,
    104   SincWeightingFunction,
    105   SincFastWeightingFunction,
    106   LastWeightingFunction
    107 };
    108 
    109 /*
    110   Helper functions.
    111 */
    112 static MagickBooleanType checkAccelerateCondition(const Image* image)
    113 {
    114   /* check if the image's colorspace is supported */
    115   if (image->colorspace != RGBColorspace &&
    116       image->colorspace != sRGBColorspace &&
    117       image->colorspace != GRAYColorspace)
    118     return(MagickFalse);
    119 
    120   /* check if the virtual pixel method is compatible with the OpenCL implementation */
    121   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
    122       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
    123     return(MagickFalse);
    124 
    125   /* check if the image has read / write mask */
    126   if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
    127     return(MagickFalse);
    128 
    129   if (image->number_channels > 4)
    130     return(MagickFalse);
    131 
    132   /* check if pixel order is R */
    133   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
    134     return(MagickFalse);
    135 
    136   if (image->number_channels == 1)
    137     return(MagickTrue);
    138 
    139   /* check if pixel order is RA */
    140   if ((image->number_channels == 2) &&
    141       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
    142     return(MagickTrue);
    143 
    144   if (image->number_channels == 2)
    145     return(MagickFalse);
    146 
    147   /* check if pixel order is RGB */
    148   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
    149       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
    150     return(MagickFalse);
    151 
    152   if (image->number_channels == 3)
    153     return(MagickTrue);
    154 
    155   /* check if pixel order is RGBA */
    156   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
    157     return(MagickFalse);
    158 
    159   return(MagickTrue);
    160 }
    161 
    162 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
    163 {
    164   if (checkAccelerateCondition(image) == MagickFalse)
    165     return(MagickFalse);
    166 
    167   /* the order will be RGBA if the image has 4 channels */
    168   if (image->number_channels != 4)
    169     return(MagickFalse);
    170 
    171   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
    172       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
    173       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
    174       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
    175     return(MagickFalse);
    176 
    177   return(MagickTrue);
    178 }
    179 
    180 static MagickBooleanType checkPixelIntensity(const Image *image,
    181   const PixelIntensityMethod method)
    182 {
    183   /* EncodePixelGamma and DecodePixelGamma are not supported */
    184   if ((method == Rec601LumaPixelIntensityMethod) ||
    185       (method == Rec709LumaPixelIntensityMethod))
    186     {
    187       if (image->colorspace == RGBColorspace)
    188         return(MagickFalse);
    189     }
    190 
    191   if ((method == Rec601LuminancePixelIntensityMethod) ||
    192       (method == Rec709LuminancePixelIntensityMethod))
    193     {
    194       if (image->colorspace == sRGBColorspace)
    195         return(MagickFalse);
    196     }
    197 
    198   return(MagickTrue);
    199 }
    200 
    201 static MagickBooleanType checkHistogramCondition(const Image *image,
    202   const PixelIntensityMethod method)
    203 {
    204   /* ensure this is the only pass get in for now. */
    205   if ((image->channel_mask & SyncChannels) == 0)
    206     return MagickFalse;
    207 
    208   return(checkPixelIntensity(image,method));
    209 }
    210 
    211 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
    212 {
    213   MagickCLEnv
    214     clEnv;
    215 
    216   clEnv=GetCurrentOpenCLEnv();
    217   if (clEnv == (MagickCLEnv) NULL)
    218     return((MagickCLEnv) NULL);
    219 
    220   if (clEnv->enabled == MagickFalse)
    221     return((MagickCLEnv) NULL);
    222 
    223   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
    224     return((MagickCLEnv) NULL);
    225 
    226   return(clEnv);
    227 }
    228 
    229 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
    230 {
    231   Image
    232     *clone;
    233 
    234   if (((image->channel_mask & RedChannel) != 0) &&
    235       ((image->channel_mask & GreenChannel) != 0) &&
    236       ((image->channel_mask & BlueChannel) != 0) &&
    237       ((image->channel_mask & AlphaChannel) != 0))
    238     clone=CloneImage(image,image->columns,image->rows,MagickTrue,exception);
    239   else
    240     {
    241       clone=CloneImage(image,0,0,MagickTrue,exception);
    242       if (clone != (Image *) NULL)
    243         SyncImagePixelCache(clone,exception);
    244     }
    245   return(clone);
    246 }
    247 
    248 /* pad the global workgroup size to the next multiple of
    249    the local workgroup size */
    250 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
    251   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
    252 {
    253   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
    254 }
    255 
    256 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
    257   const double sigma,cl_uint *width,ExceptionInfo *exception)
    258 {
    259   char
    260     geometry[MagickPathExtent];
    261 
    262   cl_int
    263     status;
    264 
    265   cl_mem
    266     imageKernelBuffer;
    267 
    268   float
    269     *kernelBufferPtr;
    270 
    271   KernelInfo
    272     *kernel;
    273 
    274   ssize_t
    275     i;
    276 
    277   (void) FormatLocaleString(geometry,MagickPathExtent,
    278     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
    279   kernel=AcquireKernelInfo(geometry,exception);
    280   if (kernel == (KernelInfo *) NULL)
    281   {
    282     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    283       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
    284     return((cl_mem) NULL);
    285   }
    286   kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
    287     sizeof(*kernelBufferPtr));
    288   for (i = 0; i < (ssize_t) kernel->width; i++)
    289     kernelBufferPtr[i] = (float)kernel->values[i];
    290   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
    291     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
    292   *width=kernel->width;
    293   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
    294   kernel=DestroyKernelInfo(kernel);
    295   if (imageKernelBuffer == (cl_mem) NULL)
    296     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    297       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
    298   return(imageKernelBuffer);
    299 }
    300 
    301 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
    302   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
    303   cl_mem histogramBuffer,Image *image,const ChannelType channel,
    304   ExceptionInfo *exception)
    305 {
    306   MagickBooleanType
    307     outputReady;
    308 
    309   cl_int
    310     clStatus;
    311 
    312   cl_kernel
    313     histogramKernel;
    314 
    315   cl_event
    316     event;
    317 
    318   cl_uint
    319     colorspace,
    320     method;
    321 
    322   register ssize_t
    323     i;
    324 
    325   size_t
    326     global_work_size[2];
    327 
    328   histogramKernel = NULL;
    329 
    330   outputReady = MagickFalse;
    331   colorspace = image->colorspace;
    332   method = image->intensity;
    333 
    334   /* get the OpenCL kernel */
    335   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
    336   if (histogramKernel == NULL)
    337   {
    338     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
    339     goto cleanup;
    340   }
    341 
    342   /* set the kernel arguments */
    343   i = 0;
    344   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    345   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
    346   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
    347   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
    348   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
    349   if (clStatus != CL_SUCCESS)
    350   {
    351     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
    352     goto cleanup;
    353   }
    354 
    355   /* launch the kernel */
    356   global_work_size[0] = image->columns;
    357   global_work_size[1] = image->rows;
    358 
    359   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
    360 
    361   if (clStatus != CL_SUCCESS)
    362   {
    363     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
    364     goto cleanup;
    365   }
    366   RecordProfileData(device,histogramKernel,event);
    367 
    368   outputReady = MagickTrue;
    369 
    370 cleanup:
    371 
    372   if (histogramKernel!=NULL)
    373     ReleaseOpenCLKernel(histogramKernel);
    374 
    375   return(outputReady);
    376 }
    377 
    378 /*
    379 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    380 %                                                                             %
    381 %                                                                             %
    382 %                                                                             %
    383 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
    384 %                                                                             %
    385 %                                                                             %
    386 %                                                                             %
    387 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    388 */
    389 
    390 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
    391   const NoiseType noise_type,ExceptionInfo *exception)
    392 {
    393   cl_command_queue
    394     queue;
    395 
    396   cl_float
    397     attenuate;
    398 
    399   cl_int
    400     status;
    401 
    402   cl_kernel
    403     addNoiseKernel;
    404 
    405   cl_mem
    406     filteredImageBuffer,
    407     imageBuffer;
    408 
    409   cl_uint
    410     bufferLength,
    411     inputPixelCount,
    412     number_channels,
    413     numRandomNumberPerPixel,
    414     pixelsPerWorkitem,
    415     seed0,
    416     seed1,
    417     workItemCount;
    418 
    419   const char
    420     *option;
    421 
    422   const unsigned long
    423     *s;
    424 
    425   MagickBooleanType
    426     outputReady;
    427 
    428   MagickCLDevice
    429     device;
    430 
    431   Image
    432     *filteredImage;
    433 
    434   RandomInfo
    435     *randomInfo;
    436 
    437   size_t
    438     gsize[1],
    439     i,
    440     lsize[1],
    441     numRandPerChannel;
    442 
    443   filteredImage=NULL;
    444   addNoiseKernel=NULL;
    445   outputReady=MagickFalse;
    446 
    447   device=RequestOpenCLDevice(clEnv);
    448   queue=AcquireOpenCLCommandQueue(device);
    449   if (queue == (cl_command_queue) NULL)
    450     goto cleanup;
    451   filteredImage=cloneImage(image,exception);
    452   if (filteredImage == (Image *) NULL)
    453     goto cleanup;
    454   if (filteredImage->number_channels != image->number_channels)
    455     goto cleanup;
    456   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
    457   if (imageBuffer == (cl_mem) NULL)
    458     goto cleanup;
    459   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
    460   if (filteredImageBuffer == (cl_mem) NULL)
    461     goto cleanup;
    462 
    463   /* find out how many random numbers needed by pixel */
    464   numRandPerChannel=0;
    465   numRandomNumberPerPixel=0;
    466   switch (noise_type)
    467   {
    468     case UniformNoise:
    469     case ImpulseNoise:
    470     case LaplacianNoise:
    471     case RandomNoise:
    472     default:
    473       numRandPerChannel=1;
    474       break;
    475     case GaussianNoise:
    476     case MultiplicativeGaussianNoise:
    477     case PoissonNoise:
    478       numRandPerChannel=2;
    479       break;
    480   };
    481   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
    482     numRandomNumberPerPixel+=numRandPerChannel;
    483   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
    484     numRandomNumberPerPixel+=numRandPerChannel;
    485   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
    486     numRandomNumberPerPixel+=numRandPerChannel;
    487   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
    488     numRandomNumberPerPixel+=numRandPerChannel;
    489 
    490   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
    491   if (addNoiseKernel == (cl_kernel) NULL)
    492   {
    493     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
    494       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    495     goto cleanup;
    496   }
    497 
    498   /* 256 work items per group, 2 groups per CU */
    499   workItemCount=device->max_compute_units*2*256;
    500   inputPixelCount=(cl_int) (image->columns*image->rows);
    501   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
    502   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
    503   lsize[0]=256;
    504   gsize[0]=workItemCount;
    505 
    506   randomInfo=AcquireRandomInfo();
    507   s=GetRandomInfoSeed(randomInfo);
    508   seed0=s[0];
    509   (void) GetPseudoRandomValue(randomInfo);
    510   seed1=s[0];
    511   randomInfo=DestroyRandomInfo(randomInfo);
    512 
    513   number_channels=(cl_uint) image->number_channels;
    514   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
    515   attenuate=1.0f;
    516   option=GetImageArtifact(image,"attenuate");
    517   if (option != (char *) NULL)
    518     attenuate=(float)StringToDouble(option,(char **) NULL);
    519 
    520   i=0;
    521   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    522   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
    523   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
    524   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
    525   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
    526   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
    527   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
    528   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
    529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
    530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
    531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
    532   if (status != CL_SUCCESS)
    533   {
    534     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    535       ResourceLimitWarning,"clSetKernelArg failed.",".");
    536     goto cleanup;
    537   }
    538 
    539   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
    540     lsize,image,filteredImage,exception);
    541 
    542 cleanup:
    543 
    544   if (addNoiseKernel != (cl_kernel) NULL)
    545     ReleaseOpenCLKernel(addNoiseKernel);
    546   if (queue != (cl_command_queue) NULL)
    547     ReleaseOpenCLCommandQueue(device,queue);
    548   if (device != (MagickCLDevice) NULL)
    549     ReleaseOpenCLDevice(device);
    550   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
    551     filteredImage=DestroyImage(filteredImage);
    552 
    553   return(filteredImage);
    554 }
    555 
    556 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
    557   const NoiseType noise_type,ExceptionInfo *exception)
    558 {
    559   Image
    560     *filteredImage;
    561 
    562   MagickCLEnv
    563     clEnv;
    564 
    565   assert(image != NULL);
    566   assert(exception != (ExceptionInfo *) NULL);
    567 
    568   if (checkAccelerateCondition(image) == MagickFalse)
    569     return((Image *) NULL);
    570 
    571   clEnv=getOpenCLEnvironment(exception);
    572   if (clEnv == (MagickCLEnv) NULL)
    573     return((Image *) NULL);
    574 
    575   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
    576   return(filteredImage);
    577 }
    578 
    579 /*
    580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    581 %                                                                             %
    582 %                                                                             %
    583 %                                                                             %
    584 %     A c c e l e r a t e B l u r I m a g e                                   %
    585 %                                                                             %
    586 %                                                                             %
    587 %                                                                             %
    588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    589 */
    590 
    591 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
    592   const double radius,const double sigma,ExceptionInfo *exception)
    593 {
    594   cl_command_queue
    595     queue;
    596 
    597   cl_int
    598     status;
    599 
    600   cl_kernel
    601     blurColumnKernel,
    602     blurRowKernel;
    603 
    604   cl_mem
    605     filteredImageBuffer,
    606     imageBuffer,
    607     imageKernelBuffer,
    608     tempImageBuffer;
    609 
    610   cl_uint
    611     imageColumns,
    612     imageRows,
    613     kernelWidth,
    614     number_channels;
    615 
    616   Image
    617     *filteredImage;
    618 
    619   MagickBooleanType
    620     outputReady;
    621 
    622   MagickCLDevice
    623     device;
    624 
    625   MagickSizeType
    626     length;
    627 
    628   size_t
    629     chunkSize=256,
    630     gsize[2],
    631     i,
    632     lsize[2];
    633 
    634   filteredImage=NULL;
    635   tempImageBuffer=NULL;
    636   imageKernelBuffer=NULL;
    637   blurRowKernel=NULL;
    638   blurColumnKernel=NULL;
    639   outputReady=MagickFalse;
    640 
    641   device=RequestOpenCLDevice(clEnv);
    642   queue=AcquireOpenCLCommandQueue(device);
    643   filteredImage=cloneImage(image,exception);
    644   if (filteredImage == (Image *) NULL)
    645     goto cleanup;
    646   if (filteredImage->number_channels != image->number_channels)
    647     goto cleanup;
    648   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
    649   if (imageBuffer == (cl_mem) NULL)
    650     goto cleanup;
    651   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
    652   if (filteredImageBuffer == (cl_mem) NULL)
    653     goto cleanup;
    654 
    655   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
    656     exception);
    657   if (imageKernelBuffer == (cl_mem) NULL)
    658     goto cleanup;
    659 
    660   length=image->columns*image->rows;
    661   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
    662     sizeof(cl_float4),(void *) NULL);
    663   if (tempImageBuffer == (cl_mem) NULL)
    664     goto cleanup;
    665 
    666   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
    667   if (blurRowKernel == (cl_kernel) NULL)
    668   {
    669     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    670       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    671     goto cleanup;
    672   }
    673 
    674   number_channels=(cl_uint) image->number_channels;
    675   imageColumns=(cl_uint) image->columns;
    676   imageRows=(cl_uint) image->rows;
    677 
    678   i=0;
    679   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    680   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
    681   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
    682   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
    683   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
    684   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
    685   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
    686   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
    687   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
    688   if (status != CL_SUCCESS)
    689   {
    690     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    691       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
    692     goto cleanup;
    693   }
    694 
    695   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
    696   gsize[1]=image->rows;
    697   lsize[0]=chunkSize;
    698   lsize[1]=1;
    699 
    700   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
    701     lsize,image,filteredImage,exception);
    702   if (outputReady == MagickFalse)
    703     goto cleanup;
    704 
    705   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
    706   if (blurColumnKernel == (cl_kernel) NULL)
    707   {
    708     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    709       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    710     goto cleanup;
    711   }
    712 
    713   i=0;
    714   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
    715   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
    716   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
    717   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
    718   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
    719   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
    720   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
    721   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
    722   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
    723   if (status != CL_SUCCESS)
    724   {
    725     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    726       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
    727     goto cleanup;
    728   }
    729 
    730   gsize[0]=image->columns;
    731   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
    732   lsize[0]=1;
    733   lsize[1]=chunkSize;
    734 
    735   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
    736     lsize,image,filteredImage,exception);
    737 
    738 cleanup:
    739 
    740   if (tempImageBuffer != (cl_mem) NULL)
    741     ReleaseOpenCLMemObject(tempImageBuffer);
    742   if (imageKernelBuffer != (cl_mem) NULL)
    743     ReleaseOpenCLMemObject(imageKernelBuffer);
    744   if (blurRowKernel != (cl_kernel) NULL)
    745     ReleaseOpenCLKernel(blurRowKernel);
    746   if (blurColumnKernel != (cl_kernel) NULL)
    747     ReleaseOpenCLKernel(blurColumnKernel);
    748   if (queue != (cl_command_queue) NULL)
    749     ReleaseOpenCLCommandQueue(device,queue);
    750   if (device != (MagickCLDevice) NULL)
    751     ReleaseOpenCLDevice(device);
    752   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
    753     filteredImage=DestroyImage(filteredImage);
    754 
    755   return(filteredImage);
    756 }
    757 
    758 MagickPrivate Image* AccelerateBlurImage(const Image *image,
    759   const double radius,const double sigma,ExceptionInfo *exception)
    760 {
    761   Image
    762     *filteredImage;
    763 
    764   MagickCLEnv
    765     clEnv;
    766 
    767   assert(image != NULL);
    768   assert(exception != (ExceptionInfo *) NULL);
    769 
    770   if (checkAccelerateCondition(image) == MagickFalse)
    771     return((Image *) NULL);
    772 
    773   clEnv=getOpenCLEnvironment(exception);
    774   if (clEnv == (MagickCLEnv) NULL)
    775     return((Image *) NULL);
    776 
    777   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
    778   return(filteredImage);
    779 }
    780 
    781 /*
    782 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    783 %                                                                             %
    784 %                                                                             %
    785 %                                                                             %
    786 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
    787 %                                                                             %
    788 %                                                                             %
    789 %                                                                             %
    790 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    791 */
    792 
    793 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
    794   const MagickBooleanType sharpen,ExceptionInfo *exception)
    795 {
    796   CacheView
    797     *image_view;
    798 
    799   cl_command_queue
    800     queue;
    801 
    802   cl_int
    803     clStatus;
    804 
    805   cl_kernel
    806     filterKernel;
    807 
    808   cl_event
    809     event;
    810 
    811   cl_mem
    812     imageBuffer;
    813 
    814   cl_mem_flags
    815     mem_flags;
    816 
    817   MagickBooleanType
    818     outputReady;
    819 
    820   MagickCLDevice
    821     device;
    822 
    823   MagickSizeType
    824     length;
    825 
    826   size_t
    827     global_work_size[2];
    828 
    829   unsigned int
    830     i,
    831     uSharpen;
    832 
    833   void
    834     *inputPixels;
    835 
    836   outputReady = MagickFalse;
    837   inputPixels = NULL;
    838   imageBuffer = NULL;
    839   filterKernel = NULL;
    840   queue = NULL;
    841 
    842   device = RequestOpenCLDevice(clEnv);
    843 
    844   /* Create and initialize OpenCL buffers. */
    845   image_view=AcquireAuthenticCacheView(image,exception);
    846   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
    847   if (inputPixels == (void *) NULL)
    848   {
    849     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
    850     goto cleanup;
    851   }
    852 
    853   /* If the host pointer is aligned to the size of CLPixelPacket,
    854      then use the host buffer directly from the GPU; otherwise,
    855      create a buffer on the GPU and copy the data over */
    856   if (ALIGNED(inputPixels,CLPixelPacket))
    857   {
    858     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
    859   }
    860   else
    861   {
    862     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
    863   }
    864   /* create a CL buffer from image pixel buffer */
    865   length = image->columns * image->rows;
    866   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
    867   if (clStatus != CL_SUCCESS)
    868   {
    869     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
    870     goto cleanup;
    871   }
    872 
    873   filterKernel = AcquireOpenCLKernel(device,"Contrast");
    874   if (filterKernel == NULL)
    875   {
    876     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
    877     goto cleanup;
    878   }
    879 
    880   i = 0;
    881   clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    882 
    883   uSharpen = (sharpen == MagickFalse)?0:1;
    884   clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
    885   if (clStatus != CL_SUCCESS)
    886   {
    887     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
    888     goto cleanup;
    889   }
    890 
    891   global_work_size[0] = image->columns;
    892   global_work_size[1] = image->rows;
    893   /* launch the kernel */
    894   queue = AcquireOpenCLCommandQueue(device);
    895   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
    896   if (clStatus != CL_SUCCESS)
    897   {
    898     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
    899     goto cleanup;
    900   }
    901   RecordProfileData(device,filterKernel,event);
    902 
    903   if (ALIGNED(inputPixels,CLPixelPacket))
    904   {
    905     length = image->columns * image->rows;
    906     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
    907   }
    908   else
    909   {
    910     length = image->columns * image->rows;
    911     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
    912   }
    913   if (clStatus != CL_SUCCESS)
    914   {
    915     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
    916     goto cleanup;
    917   }
    918   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
    919 
    920 cleanup:
    921 
    922   image_view=DestroyCacheView(image_view);
    923 
    924   if (imageBuffer!=NULL)
    925     clEnv->library->clReleaseMemObject(imageBuffer);
    926   if (filterKernel!=NULL)
    927     ReleaseOpenCLKernel(filterKernel);
    928   if (queue != NULL)
    929     ReleaseOpenCLCommandQueue(device,queue);
    930   if (device != NULL)
    931     ReleaseOpenCLDevice(device);
    932 
    933   return(outputReady);
    934 }
    935 
    936 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
    937   const MagickBooleanType sharpen,ExceptionInfo *exception)
    938 {
    939   MagickBooleanType
    940     status;
    941 
    942   MagickCLEnv
    943     clEnv;
    944 
    945   assert(image != NULL);
    946   assert(exception != (ExceptionInfo *) NULL);
    947 
    948   if (checkAccelerateConditionRGBA(image) == MagickFalse)
    949     return(MagickFalse);
    950 
    951   clEnv=getOpenCLEnvironment(exception);
    952   if (clEnv == (MagickCLEnv) NULL)
    953     return(MagickFalse);
    954 
    955   status=ComputeContrastImage(image,clEnv,sharpen,exception);
    956   return(status);
    957 }
    958 
    959 /*
    960 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    961 %                                                                             %
    962 %                                                                             %
    963 %                                                                             %
    964 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
    965 %                                                                             %
    966 %                                                                             %
    967 %                                                                             %
    968 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    969 */
    970 
    971 static MagickBooleanType ComputeContrastStretchImage(Image *image,
    972   MagickCLEnv clEnv,const double black_point,const double white_point,
    973   ExceptionInfo *exception)
    974 {
    975 #define ContrastStretchImageTag  "ContrastStretch/Image"
    976 #define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
    977 
    978   CacheView
    979     *image_view;
    980 
    981   cl_command_queue
    982     queue;
    983 
    984   cl_int
    985     clStatus;
    986 
    987   cl_mem_flags
    988     mem_flags;
    989 
    990   cl_mem
    991     histogramBuffer,
    992     imageBuffer,
    993     stretchMapBuffer;
    994 
    995   cl_kernel
    996     histogramKernel,
    997     stretchKernel;
    998 
    999   cl_event
   1000     event;
   1001 
   1002   cl_uint4
   1003     *histogram;
   1004 
   1005   double
   1006     intensity;
   1007 
   1008   FloatPixelPacket
   1009     black,
   1010     white;
   1011 
   1012   MagickBooleanType
   1013     outputReady,
   1014     status;
   1015 
   1016   MagickCLDevice
   1017     device;
   1018 
   1019   MagickSizeType
   1020     length;
   1021 
   1022   PixelPacket
   1023     *stretch_map;
   1024 
   1025   register ssize_t
   1026     i;
   1027 
   1028   size_t
   1029     global_work_size[2];
   1030 
   1031   void
   1032     *hostPtr,
   1033     *inputPixels;
   1034 
   1035   histogram=NULL;
   1036   stretch_map=NULL;
   1037   inputPixels = NULL;
   1038   imageBuffer = NULL;
   1039   histogramBuffer = NULL;
   1040   stretchMapBuffer = NULL;
   1041   histogramKernel = NULL;
   1042   stretchKernel = NULL;
   1043   queue = NULL;
   1044   outputReady = MagickFalse;
   1045 
   1046 
   1047   assert(image != (Image *) NULL);
   1048   assert(image->signature == MagickCoreSignature);
   1049   if (image->debug != MagickFalse)
   1050     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   1051 
   1052   //exception=(&image->exception);
   1053 
   1054   /*
   1055    * initialize opencl env
   1056    */
   1057   device = RequestOpenCLDevice(clEnv);
   1058   queue = AcquireOpenCLCommandQueue(device);
   1059 
   1060   /*
   1061     Allocate and initialize histogram arrays.
   1062   */
   1063   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
   1064 
   1065   if (histogram == (cl_uint4 *) NULL)
   1066     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
   1067 
   1068   /* reset histogram */
   1069   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
   1070 
   1071   /*
   1072   if (IsGrayImage(image,exception) != MagickFalse)
   1073     (void) SetImageColorspace(image,GRAYColorspace);
   1074   */
   1075 
   1076   status=MagickTrue;
   1077 
   1078 
   1079   /*
   1080     Form histogram.
   1081   */
   1082   /* Create and initialize OpenCL buffers. */
   1083   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
   1084   /* assume this  will get a writable image */
   1085   image_view=AcquireAuthenticCacheView(image,exception);
   1086   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   1087 
   1088   if (inputPixels == (void *) NULL)
   1089   {
   1090     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   1091     goto cleanup;
   1092   }
   1093   /* If the host pointer is aligned to the size of CLPixelPacket,
   1094      then use the host buffer directly from the GPU; otherwise,
   1095      create a buffer on the GPU and copy the data over */
   1096   if (ALIGNED(inputPixels,CLPixelPacket))
   1097   {
   1098     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1099   }
   1100   else
   1101   {
   1102     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   1103   }
   1104   /* create a CL buffer from image pixel buffer */
   1105   length = image->columns * image->rows;
   1106   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1107   if (clStatus != CL_SUCCESS)
   1108   {
   1109     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1110     goto cleanup;
   1111   }
   1112 
   1113   /* If the host pointer is aligned to the size of cl_uint,
   1114      then use the host buffer directly from the GPU; otherwise,
   1115      create a buffer on the GPU and copy the data over */
   1116   if (ALIGNED(histogram,cl_uint4))
   1117   {
   1118     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   1119     hostPtr = histogram;
   1120   }
   1121   else
   1122   {
   1123     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1124     hostPtr = histogram;
   1125   }
   1126   /* create a CL buffer for histogram  */
   1127   length = (MaxMap+1);
   1128   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
   1129   if (clStatus != CL_SUCCESS)
   1130   {
   1131     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1132     goto cleanup;
   1133   }
   1134 
   1135   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
   1136   if (status == MagickFalse)
   1137     goto cleanup;
   1138 
   1139   /* read from the kenel output */
   1140   if (ALIGNED(histogram,cl_uint4))
   1141   {
   1142     length = (MaxMap+1);
   1143     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
   1144   }
   1145   else
   1146   {
   1147     length = (MaxMap+1);
   1148     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
   1149   }
   1150   if (clStatus != CL_SUCCESS)
   1151   {
   1152     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1153     goto cleanup;
   1154   }
   1155 
   1156   /* unmap, don't block gpu to use this buffer again.  */
   1157   if (ALIGNED(histogram,cl_uint4))
   1158   {
   1159     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
   1160     if (clStatus != CL_SUCCESS)
   1161     {
   1162       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   1163       goto cleanup;
   1164     }
   1165   }
   1166 
   1167   /* recreate input buffer later, in case image updated */
   1168 #ifdef RECREATEBUFFER
   1169   if (imageBuffer!=NULL)
   1170     clEnv->library->clReleaseMemObject(imageBuffer);
   1171 #endif
   1172 
   1173   /* CPU stuff */
   1174   /*
   1175      Find the histogram boundaries by locating the black/white levels.
   1176   */
   1177   black.red=0.0;
   1178   white.red=MaxRange(QuantumRange);
   1179   if ((image->channel_mask & RedChannel) != 0)
   1180   {
   1181     intensity=0.0;
   1182     for (i=0; i <= (ssize_t) MaxMap; i++)
   1183     {
   1184       intensity+=histogram[i].s[2];
   1185       if (intensity > black_point)
   1186         break;
   1187     }
   1188     black.red=(MagickRealType) i;
   1189     intensity=0.0;
   1190     for (i=(ssize_t) MaxMap; i != 0; i--)
   1191     {
   1192       intensity+=histogram[i].s[2];
   1193       if (intensity > ((double) image->columns*image->rows-white_point))
   1194         break;
   1195     }
   1196     white.red=(MagickRealType) i;
   1197   }
   1198   black.green=0.0;
   1199   white.green=MaxRange(QuantumRange);
   1200   if ((image->channel_mask & GreenChannel) != 0)
   1201   {
   1202     intensity=0.0;
   1203     for (i=0; i <= (ssize_t) MaxMap; i++)
   1204     {
   1205       intensity+=histogram[i].s[2];
   1206       if (intensity > black_point)
   1207         break;
   1208     }
   1209     black.green=(MagickRealType) i;
   1210     intensity=0.0;
   1211     for (i=(ssize_t) MaxMap; i != 0; i--)
   1212     {
   1213       intensity+=histogram[i].s[2];
   1214       if (intensity > ((double) image->columns*image->rows-white_point))
   1215         break;
   1216     }
   1217     white.green=(MagickRealType) i;
   1218   }
   1219   black.blue=0.0;
   1220   white.blue=MaxRange(QuantumRange);
   1221   if ((image->channel_mask & BlueChannel) != 0)
   1222   {
   1223     intensity=0.0;
   1224     for (i=0; i <= (ssize_t) MaxMap; i++)
   1225     {
   1226       intensity+=histogram[i].s[2];
   1227       if (intensity > black_point)
   1228         break;
   1229     }
   1230     black.blue=(MagickRealType) i;
   1231     intensity=0.0;
   1232     for (i=(ssize_t) MaxMap; i != 0; i--)
   1233     {
   1234       intensity+=histogram[i].s[2];
   1235       if (intensity > ((double) image->columns*image->rows-white_point))
   1236         break;
   1237     }
   1238     white.blue=(MagickRealType) i;
   1239   }
   1240   black.alpha=0.0;
   1241   white.alpha=MaxRange(QuantumRange);
   1242   if ((image->channel_mask & AlphaChannel) != 0)
   1243   {
   1244     intensity=0.0;
   1245     for (i=0; i <= (ssize_t) MaxMap; i++)
   1246     {
   1247       intensity+=histogram[i].s[2];
   1248       if (intensity > black_point)
   1249         break;
   1250     }
   1251     black.alpha=(MagickRealType) i;
   1252     intensity=0.0;
   1253     for (i=(ssize_t) MaxMap; i != 0; i--)
   1254     {
   1255       intensity+=histogram[i].s[2];
   1256       if (intensity > ((double) image->columns*image->rows-white_point))
   1257         break;
   1258     }
   1259     white.alpha=(MagickRealType) i;
   1260   }
   1261   /*
   1262   black.index=0.0;
   1263   white.index=MaxRange(QuantumRange);
   1264   if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
   1265   {
   1266     intensity=0.0;
   1267     for (i=0; i <= (ssize_t) MaxMap; i++)
   1268     {
   1269       intensity+=histogram[i].index;
   1270       if (intensity > black_point)
   1271         break;
   1272     }
   1273     black.index=(MagickRealType) i;
   1274     intensity=0.0;
   1275     for (i=(ssize_t) MaxMap; i != 0; i--)
   1276     {
   1277       intensity+=histogram[i].index;
   1278       if (intensity > ((double) image->columns*image->rows-white_point))
   1279         break;
   1280     }
   1281     white.index=(MagickRealType) i;
   1282   }
   1283   */
   1284 
   1285 
   1286   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
   1287     sizeof(*stretch_map));
   1288 
   1289   if (stretch_map == (PixelPacket *) NULL)
   1290     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
   1291       image->filename);
   1292 
   1293   /*
   1294     Stretch the histogram to create the stretched image mapping.
   1295   */
   1296   (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
   1297   for (i=0; i <= (ssize_t) MaxMap; i++)
   1298   {
   1299     if ((image->channel_mask & RedChannel) != 0)
   1300     {
   1301       if (i < (ssize_t) black.red)
   1302         stretch_map[i].red=(Quantum) 0;
   1303       else
   1304         if (i > (ssize_t) white.red)
   1305           stretch_map[i].red=QuantumRange;
   1306         else
   1307           if (black.red != white.red)
   1308             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1309                   (i-black.red)/(white.red-black.red)));
   1310     }
   1311     if ((image->channel_mask & GreenChannel) != 0)
   1312     {
   1313       if (i < (ssize_t) black.green)
   1314         stretch_map[i].green=0;
   1315       else
   1316         if (i > (ssize_t) white.green)
   1317           stretch_map[i].green=QuantumRange;
   1318         else
   1319           if (black.green != white.green)
   1320             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1321                   (i-black.green)/(white.green-black.green)));
   1322     }
   1323     if ((image->channel_mask & BlueChannel) != 0)
   1324     {
   1325       if (i < (ssize_t) black.blue)
   1326         stretch_map[i].blue=0;
   1327       else
   1328         if (i > (ssize_t) white.blue)
   1329           stretch_map[i].blue= QuantumRange;
   1330         else
   1331           if (black.blue != white.blue)
   1332             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1333                   (i-black.blue)/(white.blue-black.blue)));
   1334     }
   1335     if ((image->channel_mask & AlphaChannel) != 0)
   1336     {
   1337       if (i < (ssize_t) black.alpha)
   1338         stretch_map[i].alpha=0;
   1339       else
   1340         if (i > (ssize_t) white.alpha)
   1341           stretch_map[i].alpha=QuantumRange;
   1342         else
   1343           if (black.alpha != white.alpha)
   1344             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1345                   (i-black.alpha)/(white.alpha-black.alpha)));
   1346     }
   1347     /*
   1348     if (((channel & IndexChannel) != 0) &&
   1349         (image->colorspace == CMYKColorspace))
   1350     {
   1351       if (i < (ssize_t) black.index)
   1352         stretch_map[i].index=0;
   1353       else
   1354         if (i > (ssize_t) white.index)
   1355           stretch_map[i].index=QuantumRange;
   1356         else
   1357           if (black.index != white.index)
   1358             stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1359                   (i-black.index)/(white.index-black.index)));
   1360     }
   1361     */
   1362   }
   1363 
   1364   /*
   1365     Stretch the image.
   1366   */
   1367   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
   1368       (image->colorspace == CMYKColorspace)))
   1369     image->storage_class=DirectClass;
   1370   if (image->storage_class == PseudoClass)
   1371   {
   1372     /*
   1373        Stretch colormap.
   1374        */
   1375     for (i=0; i < (ssize_t) image->colors; i++)
   1376     {
   1377       if ((image->channel_mask & RedChannel) != 0)
   1378       {
   1379         if (black.red != white.red)
   1380           image->colormap[i].red=stretch_map[
   1381             ScaleQuantumToMap(image->colormap[i].red)].red;
   1382       }
   1383       if ((image->channel_mask & GreenChannel) != 0)
   1384       {
   1385         if (black.green != white.green)
   1386           image->colormap[i].green=stretch_map[
   1387             ScaleQuantumToMap(image->colormap[i].green)].green;
   1388       }
   1389       if ((image->channel_mask & BlueChannel) != 0)
   1390       {
   1391         if (black.blue != white.blue)
   1392           image->colormap[i].blue=stretch_map[
   1393             ScaleQuantumToMap(image->colormap[i].blue)].blue;
   1394       }
   1395       if ((image->channel_mask & AlphaChannel) != 0)
   1396       {
   1397         if (black.alpha != white.alpha)
   1398           image->colormap[i].alpha=stretch_map[
   1399             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
   1400       }
   1401     }
   1402   }
   1403 
   1404   /*
   1405     Stretch image.
   1406   */
   1407 
   1408 
   1409   /* GPU can work on this again, image and equalize map as input
   1410     image:        uchar4 (CLPixelPacket)
   1411     stretch_map:  uchar4 (PixelPacket)
   1412     black, white: float4 (FloatPixelPacket) */
   1413 
   1414 #ifdef RECREATEBUFFER
   1415   /* If the host pointer is aligned to the size of CLPixelPacket,
   1416      then use the host buffer directly from the GPU; otherwise,
   1417      create a buffer on the GPU and copy the data over */
   1418   if (ALIGNED(inputPixels,CLPixelPacket))
   1419   {
   1420     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   1421   }
   1422   else
   1423   {
   1424     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1425   }
   1426   /* create a CL buffer from image pixel buffer */
   1427   length = image->columns * image->rows;
   1428   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1429   if (clStatus != CL_SUCCESS)
   1430   {
   1431     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1432     goto cleanup;
   1433   }
   1434 #endif
   1435 
   1436   /* Create and initialize OpenCL buffers. */
   1437   if (ALIGNED(stretch_map, PixelPacket))
   1438   {
   1439     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1440     hostPtr = stretch_map;
   1441   }
   1442   else
   1443   {
   1444     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1445     hostPtr = stretch_map;
   1446   }
   1447   /* create a CL buffer for stretch_map  */
   1448   length = (MaxMap+1);
   1449   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
   1450   if (clStatus != CL_SUCCESS)
   1451   {
   1452     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1453     goto cleanup;
   1454   }
   1455 
   1456   /* get the OpenCL kernel */
   1457   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
   1458   if (stretchKernel == NULL)
   1459   {
   1460     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1461     goto cleanup;
   1462   }
   1463 
   1464   /* set the kernel arguments */
   1465   i = 0;
   1466   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1467   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
   1468   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
   1469   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
   1470   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
   1471   if (clStatus != CL_SUCCESS)
   1472   {
   1473     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1474     goto cleanup;
   1475   }
   1476 
   1477   /* launch the kernel */
   1478   global_work_size[0] = image->columns;
   1479   global_work_size[1] = image->rows;
   1480 
   1481   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   1482 
   1483   if (clStatus != CL_SUCCESS)
   1484   {
   1485     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1486     goto cleanup;
   1487   }
   1488   RecordProfileData(device,stretchKernel,event);
   1489 
   1490   /* read the data back */
   1491   if (ALIGNED(inputPixels,CLPixelPacket))
   1492   {
   1493     length = image->columns * image->rows;
   1494     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   1495   }
   1496   else
   1497   {
   1498     length = image->columns * image->rows;
   1499     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   1500   }
   1501   if (clStatus != CL_SUCCESS)
   1502   {
   1503     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1504     goto cleanup;
   1505   }
   1506 
   1507   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   1508 
   1509 cleanup:
   1510 
   1511   image_view=DestroyCacheView(image_view);
   1512 
   1513   if (imageBuffer!=NULL)
   1514     clEnv->library->clReleaseMemObject(imageBuffer);
   1515 
   1516   if (stretchMapBuffer!=NULL)
   1517     clEnv->library->clReleaseMemObject(stretchMapBuffer);
   1518   if (stretch_map!=NULL)
   1519     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
   1520   if (histogramBuffer!=NULL)
   1521     clEnv->library->clReleaseMemObject(histogramBuffer);
   1522   if (histogram!=NULL)
   1523     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
   1524   if (histogramKernel!=NULL)
   1525     ReleaseOpenCLKernel(histogramKernel);
   1526   if (stretchKernel!=NULL)
   1527     ReleaseOpenCLKernel(stretchKernel);
   1528   if (queue != NULL)
   1529     ReleaseOpenCLCommandQueue(device,queue);
   1530   if (device != NULL)
   1531     ReleaseOpenCLDevice(device);
   1532 
   1533   return(outputReady);
   1534 }
   1535 
   1536 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
   1537   Image *image,const double black_point,const double white_point,
   1538   ExceptionInfo *exception)
   1539 {
   1540   MagickBooleanType
   1541     status;
   1542 
   1543   MagickCLEnv
   1544     clEnv;
   1545 
   1546   assert(image != NULL);
   1547   assert(exception != (ExceptionInfo *) NULL);
   1548 
   1549   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   1550       (checkHistogramCondition(image,image->intensity) == MagickFalse))
   1551     return(MagickFalse);
   1552 
   1553   clEnv=getOpenCLEnvironment(exception);
   1554   if (clEnv == (MagickCLEnv) NULL)
   1555     return(MagickFalse);
   1556 
   1557   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
   1558     exception);
   1559   return(status);
   1560 }
   1561 
   1562 /*
   1563 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1564 %                                                                             %
   1565 %                                                                             %
   1566 %                                                                             %
   1567 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
   1568 %                                                                             %
   1569 %                                                                             %
   1570 %                                                                             %
   1571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1572 */
   1573 
   1574 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
   1575   const KernelInfo *kernel,ExceptionInfo *exception)
   1576 {
   1577   CacheView
   1578     *filteredImage_view,
   1579     *image_view;
   1580 
   1581   cl_command_queue
   1582     queue;
   1583 
   1584   cl_event
   1585     event;
   1586 
   1587   cl_kernel
   1588     clkernel;
   1589 
   1590   cl_int
   1591     clStatus;
   1592 
   1593   cl_mem
   1594     convolutionKernel,
   1595     filteredImageBuffer,
   1596     imageBuffer;
   1597 
   1598   cl_mem_flags
   1599     mem_flags;
   1600 
   1601   const void
   1602     *inputPixels;
   1603 
   1604   float
   1605     *kernelBufferPtr;
   1606 
   1607   Image
   1608     *filteredImage;
   1609 
   1610   MagickBooleanType
   1611     outputReady;
   1612 
   1613   MagickCLDevice
   1614     device;
   1615 
   1616   MagickSizeType
   1617     length;
   1618 
   1619   size_t
   1620     global_work_size[3],
   1621     localGroupSize[3],
   1622     localMemoryRequirement;
   1623 
   1624   unsigned
   1625     kernelSize;
   1626 
   1627   unsigned int
   1628     filterHeight,
   1629     filterWidth,
   1630     i,
   1631     imageHeight,
   1632     imageWidth,
   1633     matte;
   1634 
   1635   void
   1636     *filteredPixels,
   1637     *hostPtr;
   1638 
   1639   /* intialize all CL objects to NULL */
   1640   imageBuffer = NULL;
   1641   filteredImageBuffer = NULL;
   1642   convolutionKernel = NULL;
   1643   clkernel = NULL;
   1644   queue = NULL;
   1645 
   1646   filteredImage = NULL;
   1647   filteredImage_view = NULL;
   1648   outputReady = MagickFalse;
   1649 
   1650   device = RequestOpenCLDevice(clEnv);
   1651 
   1652   image_view=AcquireAuthenticCacheView(image,exception);
   1653   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   1654   if (inputPixels == (const void *) NULL)
   1655   {
   1656     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   1657     goto cleanup;
   1658   }
   1659 
   1660   /* Create and initialize OpenCL buffers. */
   1661 
   1662   /* If the host pointer is aligned to the size of CLPixelPacket,
   1663      then use the host buffer directly from the GPU; otherwise,
   1664      create a buffer on the GPU and copy the data over */
   1665   if (ALIGNED(inputPixels,CLPixelPacket))
   1666   {
   1667     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1668   }
   1669   else
   1670   {
   1671     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   1672   }
   1673   /* create a CL buffer from image pixel buffer */
   1674   length = image->columns * image->rows;
   1675   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1676   if (clStatus != CL_SUCCESS)
   1677   {
   1678     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1679     goto cleanup;
   1680   }
   1681 
   1682   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
   1683   assert(filteredImage != NULL);
   1684   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   1685   {
   1686     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   1687     goto cleanup;
   1688   }
   1689   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   1690   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   1691   if (filteredPixels == (void *) NULL)
   1692   {
   1693     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   1694     goto cleanup;
   1695   }
   1696 
   1697   if (ALIGNED(filteredPixels,CLPixelPacket))
   1698   {
   1699     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   1700     hostPtr = filteredPixels;
   1701   }
   1702   else
   1703   {
   1704     mem_flags = CL_MEM_WRITE_ONLY;
   1705     hostPtr = NULL;
   1706   }
   1707   /* create a CL buffer from image pixel buffer */
   1708   length = image->columns * image->rows;
   1709   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   1710   if (clStatus != CL_SUCCESS)
   1711   {
   1712     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1713     goto cleanup;
   1714   }
   1715 
   1716   kernelSize = (unsigned int) (kernel->width * kernel->height);
   1717   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
   1718   if (clStatus != CL_SUCCESS)
   1719   {
   1720     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1721     goto cleanup;
   1722   }
   1723 
   1724   queue = AcquireOpenCLCommandQueue(device);
   1725 
   1726   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
   1727           , 0, NULL, NULL, &clStatus);
   1728   if (clStatus != CL_SUCCESS)
   1729   {
   1730     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
   1731     goto cleanup;
   1732   }
   1733   for (i = 0; i < kernelSize; i++)
   1734   {
   1735     kernelBufferPtr[i] = (float) kernel->values[i];
   1736   }
   1737   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
   1738   if (clStatus != CL_SUCCESS)
   1739   {
   1740     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   1741     goto cleanup;
   1742   }
   1743 
   1744   /* Compute the local memory requirement for a 16x16 workgroup.
   1745      If it's larger than 16k, reduce the workgroup size to 8x8 */
   1746   localGroupSize[0] = 16;
   1747   localGroupSize[1] = 16;
   1748   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
   1749     + kernel->width*kernel->height*sizeof(float);
   1750 
   1751   if (localMemoryRequirement > device->local_memory_size)
   1752   {
   1753     localGroupSize[0] = 8;
   1754     localGroupSize[1] = 8;
   1755     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
   1756       + kernel->width*kernel->height*sizeof(float);
   1757   }
   1758   if (localMemoryRequirement <= device->local_memory_size)
   1759   {
   1760     /* get the OpenCL kernel */
   1761     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
   1762     if (clkernel == NULL)
   1763     {
   1764       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1765       goto cleanup;
   1766     }
   1767 
   1768     /* set the kernel arguments */
   1769     i = 0;
   1770     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1771     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   1772     imageWidth = (unsigned int) image->columns;
   1773     imageHeight = (unsigned int) image->rows;
   1774     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
   1775     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
   1776     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
   1777     filterWidth = (unsigned int) kernel->width;
   1778     filterHeight = (unsigned int) kernel->height;
   1779     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
   1780     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
   1781     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   1782     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
   1783     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   1784     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
   1785     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
   1786     if (clStatus != CL_SUCCESS)
   1787     {
   1788       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1789       goto cleanup;
   1790     }
   1791 
   1792     /* pad the global size to a multiple of the local work size dimension */
   1793     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
   1794     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
   1795 
   1796     /* launch the kernel */
   1797     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
   1798     if (clStatus != CL_SUCCESS)
   1799     {
   1800       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1801       goto cleanup;
   1802     }
   1803     RecordProfileData(device,clkernel,event);
   1804   }
   1805   else
   1806   {
   1807     /* get the OpenCL kernel */
   1808     clkernel = AcquireOpenCLKernel(device,"Convolve");
   1809     if (clkernel == NULL)
   1810     {
   1811       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1812       goto cleanup;
   1813     }
   1814 
   1815     /* set the kernel arguments */
   1816     i = 0;
   1817     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1818     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   1819     imageWidth = (unsigned int) image->columns;
   1820     imageHeight = (unsigned int) image->rows;
   1821     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
   1822     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
   1823     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
   1824     filterWidth = (unsigned int) kernel->width;
   1825     filterHeight = (unsigned int) kernel->height;
   1826     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
   1827     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
   1828     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   1829     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
   1830     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   1831     if (clStatus != CL_SUCCESS)
   1832     {
   1833       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1834       goto cleanup;
   1835     }
   1836 
   1837     localGroupSize[0] = 8;
   1838     localGroupSize[1] = 8;
   1839     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
   1840     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
   1841 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
   1842 
   1843     if (clStatus != CL_SUCCESS)
   1844     {
   1845       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1846       goto cleanup;
   1847     }
   1848   }
   1849   RecordProfileData(device,clkernel,event);
   1850 
   1851   if (ALIGNED(filteredPixels,CLPixelPacket))
   1852   {
   1853     length = image->columns * image->rows;
   1854     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   1855   }
   1856   else
   1857   {
   1858     length = image->columns * image->rows;
   1859     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   1860   }
   1861   if (clStatus != CL_SUCCESS)
   1862   {
   1863     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1864     goto cleanup;
   1865   }
   1866 
   1867   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   1868 
   1869 cleanup:
   1870 
   1871   image_view=DestroyCacheView(image_view);
   1872   if (filteredImage_view != NULL)
   1873     filteredImage_view=DestroyCacheView(filteredImage_view);
   1874   if (imageBuffer != NULL)
   1875     clEnv->library->clReleaseMemObject(imageBuffer);
   1876   if (filteredImageBuffer != NULL)
   1877     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   1878   if (convolutionKernel != NULL)
   1879     clEnv->library->clReleaseMemObject(convolutionKernel);
   1880   if (clkernel != NULL)
   1881     ReleaseOpenCLKernel(clkernel);
   1882   if (queue != NULL)
   1883     ReleaseOpenCLCommandQueue(device,queue);
   1884   if (device != NULL)
   1885     ReleaseOpenCLDevice(device);
   1886   if (outputReady == MagickFalse)
   1887   {
   1888     if (filteredImage != NULL)
   1889     {
   1890       DestroyImage(filteredImage);
   1891       filteredImage = NULL;
   1892     }
   1893   }
   1894 
   1895   return(filteredImage);
   1896 }
   1897 
   1898 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
   1899   const KernelInfo *kernel,ExceptionInfo *exception)
   1900 {
   1901   /* Temporary disabled due to access violation
   1902 
   1903   Image
   1904     *filteredImage;
   1905 
   1906   assert(image != NULL);
   1907   assert(kernel != (KernelInfo *) NULL);
   1908   assert(exception != (ExceptionInfo *) NULL);
   1909   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   1910       (checkOpenCLEnvironment(exception) == MagickFalse))
   1911     return((Image *) NULL);
   1912 
   1913   filteredImage=ComputeConvolveImage(image,kernel,exception);
   1914   return(filteredImage);
   1915   */
   1916   magick_unreferenced(image);
   1917   magick_unreferenced(kernel);
   1918   magick_unreferenced(exception);
   1919   return((Image *)NULL);
   1920 }
   1921 
   1922 /*
   1923 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1924 %                                                                             %
   1925 %                                                                             %
   1926 %                                                                             %
   1927 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
   1928 %                                                                             %
   1929 %                                                                             %
   1930 %                                                                             %
   1931 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1932 */
   1933 
   1934 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
   1935   ExceptionInfo*exception)
   1936 {
   1937   static const int
   1938     X[4] = {0, 1, 1,-1},
   1939     Y[4] = {1, 0, 1, 1};
   1940 
   1941   CacheView
   1942     *filteredImage_view,
   1943     *image_view;
   1944 
   1945   cl_command_queue
   1946     queue;
   1947 
   1948   cl_int
   1949     clStatus;
   1950 
   1951   cl_kernel
   1952     hullPass1,
   1953     hullPass2;
   1954 
   1955   cl_event
   1956     event;
   1957 
   1958   cl_mem_flags
   1959     mem_flags;
   1960 
   1961   cl_mem
   1962     filteredImageBuffer,
   1963     imageBuffer,
   1964     tempImageBuffer[2];
   1965 
   1966   const void
   1967     *inputPixels;
   1968 
   1969   Image
   1970     *filteredImage;
   1971 
   1972   int
   1973     k,
   1974     matte;
   1975 
   1976   MagickBooleanType
   1977     outputReady;
   1978 
   1979   MagickCLDevice
   1980     device;
   1981 
   1982   MagickSizeType
   1983     length;
   1984 
   1985   size_t
   1986     global_work_size[2];
   1987 
   1988   unsigned int
   1989     imageHeight,
   1990     imageWidth;
   1991 
   1992   void
   1993     *filteredPixels,
   1994     *hostPtr;
   1995 
   1996   outputReady = MagickFalse;
   1997   inputPixels = NULL;
   1998   filteredImage = NULL;
   1999   filteredImage_view = NULL;
   2000   filteredPixels = NULL;
   2001   imageBuffer = NULL;
   2002   filteredImageBuffer = NULL;
   2003   hullPass1 = NULL;
   2004   hullPass2 = NULL;
   2005   queue = NULL;
   2006   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
   2007 
   2008   device = RequestOpenCLDevice(clEnv);
   2009   queue = AcquireOpenCLCommandQueue(device);
   2010 
   2011   image_view=AcquireAuthenticCacheView(image,exception);
   2012   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   2013   if (inputPixels == (void *) NULL)
   2014   {
   2015     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   2016     goto cleanup;
   2017   }
   2018 
   2019   if (ALIGNED(inputPixels,CLPixelPacket))
   2020   {
   2021     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   2022   }
   2023   else
   2024   {
   2025     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   2026   }
   2027   /* create a CL buffer from image pixel buffer */
   2028   length = image->columns * image->rows;
   2029   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   2030   if (clStatus != CL_SUCCESS)
   2031   {
   2032     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2033     goto cleanup;
   2034   }
   2035 
   2036   mem_flags = CL_MEM_READ_WRITE;
   2037   length = image->columns * image->rows;
   2038   for (k = 0; k < 2; k++)
   2039   {
   2040     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
   2041     if (clStatus != CL_SUCCESS)
   2042     {
   2043       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2044       goto cleanup;
   2045     }
   2046   }
   2047 
   2048   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
   2049   assert(filteredImage != NULL);
   2050   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   2051   {
   2052     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   2053     goto cleanup;
   2054   }
   2055   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   2056   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   2057   if (filteredPixels == (void *) NULL)
   2058   {
   2059     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   2060     goto cleanup;
   2061   }
   2062 
   2063   if (ALIGNED(filteredPixels,CLPixelPacket))
   2064   {
   2065     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   2066     hostPtr = filteredPixels;
   2067   }
   2068   else
   2069   {
   2070     mem_flags = CL_MEM_WRITE_ONLY;
   2071     hostPtr = NULL;
   2072   }
   2073   /* create a CL buffer from image pixel buffer */
   2074   length = image->columns * image->rows;
   2075   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   2076   if (clStatus != CL_SUCCESS)
   2077   {
   2078     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2079     goto cleanup;
   2080   }
   2081 
   2082   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
   2083   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
   2084 
   2085   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
   2086   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
   2087   imageWidth = (unsigned int) image->columns;
   2088   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
   2089   imageHeight = (unsigned int) image->rows;
   2090   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
   2091   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   2092   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
   2093   if (clStatus != CL_SUCCESS)
   2094   {
   2095     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2096     goto cleanup;
   2097   }
   2098 
   2099   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
   2100   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
   2101   imageWidth = (unsigned int) image->columns;
   2102   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
   2103   imageHeight = (unsigned int) image->rows;
   2104   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
   2105   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   2106   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
   2107   if (clStatus != CL_SUCCESS)
   2108   {
   2109     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2110     goto cleanup;
   2111   }
   2112 
   2113 
   2114   global_work_size[0] = image->columns;
   2115   global_work_size[1] = image->rows;
   2116 
   2117 
   2118   for (k = 0; k < 4; k++)
   2119   {
   2120     cl_int2 offset;
   2121     int polarity;
   2122 
   2123 
   2124     offset.s[0] = X[k];
   2125     offset.s[1] = Y[k];
   2126     polarity = 1;
   2127     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2128     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2129     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2130     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2131     if (clStatus != CL_SUCCESS)
   2132     {
   2133       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2134       goto cleanup;
   2135     }
   2136     /* launch the kernel */
   2137 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2138     if (clStatus != CL_SUCCESS)
   2139     {
   2140       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2141       goto cleanup;
   2142     }
   2143     RecordProfileData(device,hullPass1,event);
   2144 
   2145     /* launch the kernel */
   2146 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2147     if (clStatus != CL_SUCCESS)
   2148     {
   2149       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2150       goto cleanup;
   2151     }
   2152     RecordProfileData(device,hullPass2,event);
   2153 
   2154     if (k == 0)
   2155       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
   2156     offset.s[0] = -X[k];
   2157     offset.s[1] = -Y[k];
   2158     polarity = 1;
   2159     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2160     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2161     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2162     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2163     if (clStatus != CL_SUCCESS)
   2164     {
   2165       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2166       goto cleanup;
   2167     }
   2168     /* launch the kernel */
   2169 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2170     if (clStatus != CL_SUCCESS)
   2171     {
   2172       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2173       goto cleanup;
   2174     }
   2175     RecordProfileData(device,hullPass1,event);
   2176 
   2177     /* launch the kernel */
   2178 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2179     if (clStatus != CL_SUCCESS)
   2180     {
   2181       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2182       goto cleanup;
   2183     }
   2184     RecordProfileData(device,hullPass2,event);
   2185 
   2186     offset.s[0] = -X[k];
   2187     offset.s[1] = -Y[k];
   2188     polarity = -1;
   2189     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2190     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2191     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2192     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2193     if (clStatus != CL_SUCCESS)
   2194     {
   2195       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2196       goto cleanup;
   2197     }
   2198     /* launch the kernel */
   2199 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2200     if (clStatus != CL_SUCCESS)
   2201     {
   2202       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2203       goto cleanup;
   2204     }
   2205     RecordProfileData(device,hullPass1,event);
   2206 
   2207     /* launch the kernel */
   2208 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2209     if (clStatus != CL_SUCCESS)
   2210     {
   2211       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2212       goto cleanup;
   2213     }
   2214     RecordProfileData(device,hullPass2,event);
   2215 
   2216     offset.s[0] = X[k];
   2217     offset.s[1] = Y[k];
   2218     polarity = -1;
   2219     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2220     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2221     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2222     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2223 
   2224     if (k == 3)
   2225       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
   2226 
   2227     if (clStatus != CL_SUCCESS)
   2228     {
   2229       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2230       goto cleanup;
   2231     }
   2232     /* launch the kernel */
   2233 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2234     if (clStatus != CL_SUCCESS)
   2235     {
   2236       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2237       goto cleanup;
   2238     }
   2239     RecordProfileData(device,hullPass1,event);
   2240 
   2241     /* launch the kernel */
   2242 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2243     if (clStatus != CL_SUCCESS)
   2244     {
   2245       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2246       goto cleanup;
   2247     }
   2248     RecordProfileData(device,hullPass2,event);
   2249   }
   2250 
   2251   if (ALIGNED(filteredPixels,CLPixelPacket))
   2252   {
   2253     length = image->columns * image->rows;
   2254     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   2255   }
   2256   else
   2257   {
   2258     length = image->columns * image->rows;
   2259     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   2260   }
   2261   if (clStatus != CL_SUCCESS)
   2262   {
   2263     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2264     goto cleanup;
   2265   }
   2266 
   2267   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   2268 
   2269 cleanup:
   2270 
   2271   image_view=DestroyCacheView(image_view);
   2272   if (filteredImage_view != NULL)
   2273     filteredImage_view=DestroyCacheView(filteredImage_view);
   2274 
   2275   if (queue != NULL)
   2276     ReleaseOpenCLCommandQueue(device,queue);
   2277   if (device != NULL)
   2278     ReleaseOpenCLDevice(device);
   2279   if (imageBuffer!=NULL)
   2280     clEnv->library->clReleaseMemObject(imageBuffer);
   2281   for (k = 0; k < 2; k++)
   2282   {
   2283     if (tempImageBuffer[k]!=NULL)
   2284       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
   2285   }
   2286   if (filteredImageBuffer!=NULL)
   2287     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   2288   if (hullPass1!=NULL)
   2289     ReleaseOpenCLKernel(hullPass1);
   2290   if (hullPass2!=NULL)
   2291     ReleaseOpenCLKernel(hullPass2);
   2292   if (outputReady == MagickFalse && filteredImage != NULL)
   2293     filteredImage=DestroyImage(filteredImage);
   2294 
   2295   return(filteredImage);
   2296 }
   2297 
   2298 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
   2299   ExceptionInfo* exception)
   2300 {
   2301   Image
   2302     *filteredImage;
   2303 
   2304   MagickCLEnv
   2305     clEnv;
   2306 
   2307   assert(image != NULL);
   2308   assert(exception != (ExceptionInfo *) NULL);
   2309 
   2310   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   2311     return((Image *) NULL);
   2312 
   2313   clEnv=getOpenCLEnvironment(exception);
   2314   if (clEnv == (MagickCLEnv) NULL)
   2315     return((Image *) NULL);
   2316 
   2317   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
   2318   return(filteredImage);
   2319 }
   2320 
   2321 /*
   2322 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2323 %                                                                             %
   2324 %                                                                             %
   2325 %                                                                             %
   2326 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
   2327 %                                                                             %
   2328 %                                                                             %
   2329 %                                                                             %
   2330 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2331 */
   2332 
   2333 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
   2334   ExceptionInfo *exception)
   2335 {
   2336 #define EqualizeImageTag  "Equalize/Image"
   2337 
   2338   CacheView
   2339     *image_view;
   2340 
   2341   cl_command_queue
   2342     queue;
   2343 
   2344   cl_int
   2345     clStatus;
   2346 
   2347   cl_mem_flags
   2348     mem_flags;
   2349 
   2350   cl_mem
   2351     equalizeMapBuffer,
   2352     histogramBuffer,
   2353     imageBuffer;
   2354 
   2355   cl_kernel
   2356     equalizeKernel,
   2357     histogramKernel;
   2358 
   2359   cl_event
   2360     event;
   2361 
   2362   cl_uint4
   2363     *histogram;
   2364 
   2365   FloatPixelPacket
   2366     white,
   2367     black,
   2368     intensity,
   2369     *map;
   2370 
   2371   MagickBooleanType
   2372     outputReady,
   2373     status;
   2374 
   2375   MagickCLDevice
   2376     device;
   2377 
   2378   MagickSizeType
   2379     length;
   2380 
   2381   PixelPacket
   2382     *equalize_map;
   2383 
   2384   register ssize_t
   2385     i;
   2386 
   2387   size_t
   2388     global_work_size[2];
   2389 
   2390   void
   2391     *hostPtr,
   2392     *inputPixels;
   2393 
   2394   map=NULL;
   2395   histogram=NULL;
   2396   equalize_map=NULL;
   2397   inputPixels = NULL;
   2398   imageBuffer = NULL;
   2399   histogramBuffer = NULL;
   2400   equalizeMapBuffer = NULL;
   2401   histogramKernel = NULL;
   2402   equalizeKernel = NULL;
   2403   queue = NULL;
   2404   outputReady = MagickFalse;
   2405 
   2406   assert(image != (Image *) NULL);
   2407   assert(image->signature == MagickCoreSignature);
   2408   if (image->debug != MagickFalse)
   2409     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   2410 
   2411   /*
   2412    * initialize opencl env
   2413    */
   2414   device = RequestOpenCLDevice(clEnv);
   2415   queue = AcquireOpenCLCommandQueue(device);
   2416 
   2417   /*
   2418     Allocate and initialize histogram arrays.
   2419   */
   2420   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
   2421   if (histogram == (cl_uint4 *) NULL)
   2422       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2423 
   2424   /* reset histogram */
   2425   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
   2426 
   2427   /* Create and initialize OpenCL buffers. */
   2428   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
   2429   /* assume this  will get a writable image */
   2430   image_view=AcquireAuthenticCacheView(image,exception);
   2431   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   2432 
   2433   if (inputPixels == (void *) NULL)
   2434   {
   2435     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   2436     goto cleanup;
   2437   }
   2438   /* If the host pointer is aligned to the size of CLPixelPacket,
   2439      then use the host buffer directly from the GPU; otherwise,
   2440      create a buffer on the GPU and copy the data over */
   2441   if (ALIGNED(inputPixels,CLPixelPacket))
   2442   {
   2443     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   2444   }
   2445   else
   2446   {
   2447     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   2448   }
   2449   /* create a CL buffer from image pixel buffer */
   2450   length = image->columns * image->rows;
   2451   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   2452   if (clStatus != CL_SUCCESS)
   2453   {
   2454     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2455     goto cleanup;
   2456   }
   2457 
   2458   /* If the host pointer is aligned to the size of cl_uint,
   2459      then use the host buffer directly from the GPU; otherwise,
   2460      create a buffer on the GPU and copy the data over */
   2461   if (ALIGNED(histogram,cl_uint4))
   2462   {
   2463     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   2464     hostPtr = histogram;
   2465   }
   2466   else
   2467   {
   2468     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2469     hostPtr = histogram;
   2470   }
   2471   /* create a CL buffer for histogram  */
   2472   length = (MaxMap+1);
   2473   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
   2474   if (clStatus != CL_SUCCESS)
   2475   {
   2476     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2477     goto cleanup;
   2478   }
   2479 
   2480   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
   2481   if (status == MagickFalse)
   2482     goto cleanup;
   2483 
   2484   /* read from the kenel output */
   2485   if (ALIGNED(histogram,cl_uint4))
   2486   {
   2487     length = (MaxMap+1);
   2488     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
   2489   }
   2490   else
   2491   {
   2492     length = (MaxMap+1);
   2493     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
   2494   }
   2495   if (clStatus != CL_SUCCESS)
   2496   {
   2497     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2498     goto cleanup;
   2499   }
   2500 
   2501   /* unmap, don't block gpu to use this buffer again.  */
   2502   if (ALIGNED(histogram,cl_uint4))
   2503   {
   2504     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
   2505     if (clStatus != CL_SUCCESS)
   2506     {
   2507       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   2508       goto cleanup;
   2509     }
   2510   }
   2511 
   2512   /* recreate input buffer later, in case image updated */
   2513 #ifdef RECREATEBUFFER
   2514   if (imageBuffer!=NULL)
   2515     clEnv->library->clReleaseMemObject(imageBuffer);
   2516 #endif
   2517 
   2518   /* CPU stuff */
   2519   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
   2520   if (equalize_map == (PixelPacket *) NULL)
   2521     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2522 
   2523   map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
   2524   if (map == (FloatPixelPacket *) NULL)
   2525     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2526 
   2527   /*
   2528     Integrate the histogram to get the equalization map.
   2529   */
   2530   (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
   2531   for (i=0; i <= (ssize_t) MaxMap; i++)
   2532   {
   2533     if ((image->channel_mask & SyncChannels) != 0)
   2534     {
   2535       intensity.red+=histogram[i].s[2];
   2536       map[i]=intensity;
   2537       continue;
   2538     }
   2539     if ((image->channel_mask & RedChannel) != 0)
   2540       intensity.red+=histogram[i].s[2];
   2541     if ((image->channel_mask & GreenChannel) != 0)
   2542       intensity.green+=histogram[i].s[1];
   2543     if ((image->channel_mask & BlueChannel) != 0)
   2544       intensity.blue+=histogram[i].s[0];
   2545     if ((image->channel_mask & AlphaChannel) != 0)
   2546       intensity.alpha+=histogram[i].s[3];
   2547     /*
   2548     if (((channel & IndexChannel) != 0) &&
   2549         (image->colorspace == CMYKColorspace))
   2550     {
   2551       intensity.index+=histogram[i].index;
   2552     }
   2553     */
   2554     map[i]=intensity;
   2555   }
   2556   black=map[0];
   2557   white=map[(int) MaxMap];
   2558   (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
   2559   for (i=0; i <= (ssize_t) MaxMap; i++)
   2560   {
   2561     if ((image->channel_mask & SyncChannels) != 0)
   2562     {
   2563       if (white.red != black.red)
   2564         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2565                 (map[i].red-black.red))/(white.red-black.red)));
   2566       continue;
   2567     }
   2568     if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
   2569       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2570               (map[i].red-black.red))/(white.red-black.red)));
   2571     if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
   2572       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2573               (map[i].green-black.green))/(white.green-black.green)));
   2574     if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
   2575       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2576               (map[i].blue-black.blue))/(white.blue-black.blue)));
   2577     if (((image->channel_mask & AlphaChannel) != 0) && (white.alpha != black.alpha))
   2578       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2579               (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
   2580     /*
   2581     if ((((channel & IndexChannel) != 0) &&
   2582           (image->colorspace == CMYKColorspace)) &&
   2583         (white.index != black.index))
   2584       equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2585               (map[i].index-black.index))/(white.index-black.index)));
   2586     */
   2587   }
   2588 
   2589   if (image->storage_class == PseudoClass)
   2590   {
   2591     /*
   2592        Equalize colormap.
   2593        */
   2594     for (i=0; i < (ssize_t) image->colors; i++)
   2595     {
   2596       if ((image->channel_mask & SyncChannels) != 0)
   2597       {
   2598         if (white.red != black.red)
   2599         {
   2600           image->colormap[i].red=equalize_map[
   2601             ScaleQuantumToMap(image->colormap[i].red)].red;
   2602           image->colormap[i].green=equalize_map[
   2603             ScaleQuantumToMap(image->colormap[i].green)].red;
   2604           image->colormap[i].blue=equalize_map[
   2605             ScaleQuantumToMap(image->colormap[i].blue)].red;
   2606           image->colormap[i].alpha=equalize_map[
   2607             ScaleQuantumToMap(image->colormap[i].alpha)].red;
   2608         }
   2609         continue;
   2610       }
   2611       if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
   2612         image->colormap[i].red=equalize_map[
   2613           ScaleQuantumToMap(image->colormap[i].red)].red;
   2614       if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
   2615         image->colormap[i].green=equalize_map[
   2616           ScaleQuantumToMap(image->colormap[i].green)].green;
   2617       if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
   2618         image->colormap[i].blue=equalize_map[
   2619           ScaleQuantumToMap(image->colormap[i].blue)].blue;
   2620       if (((image->channel_mask & AlphaChannel) != 0) &&
   2621           (white.alpha != black.alpha))
   2622         image->colormap[i].alpha=equalize_map[
   2623           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
   2624     }
   2625   }
   2626 
   2627   /*
   2628     Equalize image.
   2629   */
   2630 
   2631   /* GPU can work on this again, image and equalize map as input
   2632     image:        uchar4 (CLPixelPacket)
   2633     equalize_map: uchar4 (PixelPacket)
   2634     black, white: float4 (FloatPixelPacket) */
   2635 
   2636 #ifdef RECREATEBUFFER
   2637   /* If the host pointer is aligned to the size of CLPixelPacket,
   2638      then use the host buffer directly from the GPU; otherwise,
   2639      create a buffer on the GPU and copy the data over */
   2640   if (ALIGNED(inputPixels,CLPixelPacket))
   2641   {
   2642     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   2643   }
   2644   else
   2645   {
   2646     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2647   }
   2648   /* create a CL buffer from image pixel buffer */
   2649   length = image->columns * image->rows;
   2650   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   2651   if (clStatus != CL_SUCCESS)
   2652   {
   2653     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2654     goto cleanup;
   2655   }
   2656 #endif
   2657 
   2658   /* Create and initialize OpenCL buffers. */
   2659   if (ALIGNED(equalize_map, PixelPacket))
   2660   {
   2661     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   2662     hostPtr = equalize_map;
   2663   }
   2664   else
   2665   {
   2666     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2667     hostPtr = equalize_map;
   2668   }
   2669   /* create a CL buffer for eqaulize_map  */
   2670   length = (MaxMap+1);
   2671   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
   2672   if (clStatus != CL_SUCCESS)
   2673   {
   2674     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2675     goto cleanup;
   2676   }
   2677 
   2678   /* get the OpenCL kernel */
   2679   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
   2680   if (equalizeKernel == NULL)
   2681   {
   2682     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   2683     goto cleanup;
   2684   }
   2685 
   2686   /* set the kernel arguments */
   2687   i = 0;
   2688   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2689   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
   2690   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
   2691   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
   2692   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
   2693   if (clStatus != CL_SUCCESS)
   2694   {
   2695     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2696     goto cleanup;
   2697   }
   2698 
   2699   /* launch the kernel */
   2700   global_work_size[0] = image->columns;
   2701   global_work_size[1] = image->rows;
   2702 
   2703   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2704 
   2705   if (clStatus != CL_SUCCESS)
   2706   {
   2707     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2708     goto cleanup;
   2709   }
   2710   RecordProfileData(device,equalizeKernel,event);
   2711 
   2712   /* read the data back */
   2713   if (ALIGNED(inputPixels,CLPixelPacket))
   2714   {
   2715     length = image->columns * image->rows;
   2716     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   2717   }
   2718   else
   2719   {
   2720     length = image->columns * image->rows;
   2721     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   2722   }
   2723   if (clStatus != CL_SUCCESS)
   2724   {
   2725     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2726     goto cleanup;
   2727   }
   2728 
   2729   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   2730 
   2731 cleanup:
   2732 
   2733   image_view=DestroyCacheView(image_view);
   2734 
   2735   if (imageBuffer!=NULL)
   2736     clEnv->library->clReleaseMemObject(imageBuffer);
   2737   if (map!=NULL)
   2738     map=(FloatPixelPacket *) RelinquishMagickMemory(map);
   2739   if (equalizeMapBuffer!=NULL)
   2740     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
   2741   if (equalize_map!=NULL)
   2742     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
   2743   if (histogramBuffer!=NULL)
   2744     clEnv->library->clReleaseMemObject(histogramBuffer);
   2745   if (histogram!=NULL)
   2746     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
   2747   if (histogramKernel!=NULL)
   2748     ReleaseOpenCLKernel(histogramKernel);
   2749   if (equalizeKernel!=NULL)
   2750     ReleaseOpenCLKernel(equalizeKernel);
   2751   if (queue != NULL)
   2752     ReleaseOpenCLCommandQueue(device, queue);
   2753   if (device != NULL)
   2754     ReleaseOpenCLDevice(device);
   2755 
   2756   return(outputReady);
   2757 }
   2758 
   2759 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
   2760   ExceptionInfo *exception)
   2761 {
   2762   MagickBooleanType
   2763     status;
   2764 
   2765   MagickCLEnv
   2766     clEnv;
   2767 
   2768   assert(image != NULL);
   2769   assert(exception != (ExceptionInfo *) NULL);
   2770 
   2771   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   2772       (checkHistogramCondition(image,image->intensity) == MagickFalse))
   2773     return(MagickFalse);
   2774 
   2775   clEnv=getOpenCLEnvironment(exception);
   2776   if (clEnv == (MagickCLEnv) NULL)
   2777     return(MagickFalse);
   2778 
   2779   status=ComputeEqualizeImage(image,clEnv,exception);
   2780   return(status);
   2781 }
   2782 
   2783 /*
   2784 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2785 %                                                                             %
   2786 %                                                                             %
   2787 %                                                                             %
   2788 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
   2789 %                                                                             %
   2790 %                                                                             %
   2791 %                                                                             %
   2792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2793 */
   2794 
   2795 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
   2796   const MagickFunction function,const size_t number_parameters,
   2797   const double *parameters,ExceptionInfo *exception)
   2798 {
   2799   cl_command_queue
   2800     queue;
   2801 
   2802   cl_int
   2803     status;
   2804 
   2805   cl_kernel
   2806     functionKernel;
   2807 
   2808   cl_mem
   2809     imageBuffer,
   2810     parametersBuffer;
   2811 
   2812   cl_uint
   2813     number_params,
   2814     number_channels;
   2815 
   2816   float
   2817     *parametersBufferPtr;
   2818 
   2819   MagickBooleanType
   2820     outputReady;
   2821 
   2822   MagickCLDevice
   2823     device;
   2824 
   2825   size_t
   2826     gsize[2],
   2827     i;
   2828 
   2829   outputReady=MagickFalse;
   2830 
   2831   functionKernel=NULL;
   2832   parametersBuffer=NULL;
   2833 
   2834   device=RequestOpenCLDevice(clEnv);
   2835   queue=AcquireOpenCLCommandQueue(device);
   2836   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   2837   if (imageBuffer == (cl_mem) NULL)
   2838     goto cleanup;
   2839 
   2840   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
   2841     sizeof(float));
   2842   if (parametersBufferPtr == (float *) NULL)
   2843     goto cleanup;
   2844   for (i=0; i<number_parameters; i++)
   2845     parametersBufferPtr[i]=(float) parameters[i];
   2846   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   2847     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
   2848     parametersBufferPtr);
   2849   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
   2850   if (parametersBuffer == (cl_mem) NULL)
   2851   {
   2852     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2853       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   2854     goto cleanup;
   2855   }
   2856 
   2857   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
   2858   if (functionKernel == (cl_kernel) NULL)
   2859   {
   2860     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2861       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   2862     goto cleanup;
   2863   }
   2864 
   2865   number_channels=(cl_uint) image->number_channels;
   2866   number_params=(cl_uint) number_parameters;
   2867 
   2868   i=0;
   2869   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2870   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   2871   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   2872   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
   2873   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
   2874   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
   2875   if (status != CL_SUCCESS)
   2876   {
   2877     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2878       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   2879     goto cleanup;
   2880   }
   2881 
   2882   gsize[0]=image->columns;
   2883   gsize[1]=image->rows;
   2884   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
   2885     gsize,(const size_t *) NULL,image,(const Image *) NULL,exception);
   2886 
   2887 cleanup:
   2888 
   2889   if (parametersBuffer != (cl_mem) NULL)
   2890     ReleaseOpenCLMemObject(parametersBuffer);
   2891   if (functionKernel != (cl_kernel) NULL)
   2892     ReleaseOpenCLKernel(functionKernel);
   2893   if (queue != (cl_command_queue) NULL)
   2894     ReleaseOpenCLCommandQueue(device,queue);
   2895   if (device != (MagickCLDevice) NULL)
   2896     ReleaseOpenCLDevice(device);
   2897   return(outputReady);
   2898 }
   2899 
   2900 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
   2901   const MagickFunction function,const size_t number_parameters,
   2902   const double *parameters,ExceptionInfo *exception)
   2903 {
   2904   MagickBooleanType
   2905     status;
   2906 
   2907   MagickCLEnv
   2908     clEnv;
   2909 
   2910   assert(image != NULL);
   2911   assert(exception != (ExceptionInfo *) NULL);
   2912 
   2913   if (checkAccelerateCondition(image) == MagickFalse)
   2914     return(MagickFalse);
   2915 
   2916   clEnv=getOpenCLEnvironment(exception);
   2917   if (clEnv == (MagickCLEnv) NULL)
   2918     return(MagickFalse);
   2919 
   2920   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
   2921     parameters,exception);
   2922   return(status);
   2923 }
   2924 
   2925 /*
   2926 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2927 %                                                                             %
   2928 %                                                                             %
   2929 %                                                                             %
   2930 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
   2931 %                                                                             %
   2932 %                                                                             %
   2933 %                                                                             %
   2934 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2935 */
   2936 
   2937 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
   2938   const PixelIntensityMethod method,ExceptionInfo *exception)
   2939 {
   2940   cl_command_queue
   2941     queue;
   2942 
   2943   cl_int
   2944     status;
   2945 
   2946   cl_kernel
   2947     grayscaleKernel;
   2948 
   2949   cl_mem
   2950     imageBuffer;
   2951 
   2952   cl_uint
   2953     number_channels,
   2954     colorspace,
   2955     intensityMethod;
   2956 
   2957   MagickBooleanType
   2958     outputReady;
   2959 
   2960   MagickCLDevice
   2961     device;
   2962 
   2963   size_t
   2964     gsize[2],
   2965     i;
   2966 
   2967   outputReady=MagickFalse;
   2968   grayscaleKernel=NULL;
   2969 
   2970   assert(image != (Image *) NULL);
   2971   assert(image->signature == MagickCoreSignature);
   2972   device=RequestOpenCLDevice(clEnv);
   2973   queue=AcquireOpenCLCommandQueue(device);
   2974   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   2975   if (imageBuffer == (cl_mem) NULL)
   2976     goto cleanup;
   2977 
   2978   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
   2979   if (grayscaleKernel == (cl_kernel) NULL)
   2980   {
   2981     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2982       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   2983     goto cleanup;
   2984   }
   2985 
   2986   number_channels=(cl_uint) image->number_channels;
   2987   intensityMethod=(cl_uint) method;
   2988   colorspace=(cl_uint) image->colorspace;
   2989 
   2990   i=0;
   2991   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2992   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
   2993   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
   2994   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
   2995   if (status != CL_SUCCESS)
   2996   {
   2997     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2998       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   2999     goto cleanup;
   3000   }
   3001 
   3002   gsize[0]=image->columns;
   3003   gsize[1]=image->rows;
   3004   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
   3005     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
   3006     exception);
   3007 
   3008 cleanup:
   3009 
   3010   if (grayscaleKernel != (cl_kernel) NULL)
   3011     ReleaseOpenCLKernel(grayscaleKernel);
   3012   if (queue != (cl_command_queue) NULL)
   3013     ReleaseOpenCLCommandQueue(device,queue);
   3014   if (device != (MagickCLDevice) NULL)
   3015     ReleaseOpenCLDevice(device);
   3016 
   3017   return(outputReady);
   3018 }
   3019 
   3020 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
   3021   const PixelIntensityMethod method,ExceptionInfo *exception)
   3022 {
   3023   MagickBooleanType
   3024     status;
   3025 
   3026   MagickCLEnv
   3027     clEnv;
   3028 
   3029   assert(image != NULL);
   3030   assert(exception != (ExceptionInfo *) NULL);
   3031 
   3032   if ((checkAccelerateCondition(image) == MagickFalse) ||
   3033       (checkPixelIntensity(image,method) == MagickFalse))
   3034     return(MagickFalse);
   3035 
   3036   if (image->number_channels < 3)
   3037     return(MagickFalse);
   3038 
   3039   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
   3040       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
   3041       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
   3042     return(MagickFalse);
   3043 
   3044   clEnv=getOpenCLEnvironment(exception);
   3045   if (clEnv == (MagickCLEnv) NULL)
   3046     return(MagickFalse);
   3047 
   3048   status=ComputeGrayscaleImage(image,clEnv,method,exception);
   3049   return(status);
   3050 }
   3051 
   3052 /*
   3053 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3054 %                                                                             %
   3055 %                                                                             %
   3056 %                                                                             %
   3057 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
   3058 %                                                                             %
   3059 %                                                                             %
   3060 %                                                                             %
   3061 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3062 */
   3063 
   3064 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
   3065   const double radius,const double strength,ExceptionInfo *exception)
   3066 {
   3067   CacheView
   3068     *filteredImage_view,
   3069     *image_view;
   3070 
   3071   cl_command_queue
   3072     queue;
   3073 
   3074   cl_int
   3075     clStatus,
   3076     iRadius;
   3077 
   3078   cl_kernel
   3079     blurRowKernel,
   3080     blurColumnKernel;
   3081 
   3082   cl_event
   3083     event;
   3084 
   3085   cl_mem
   3086     filteredImageBuffer,
   3087     imageBuffer,
   3088     imageKernelBuffer,
   3089     tempImageBuffer;
   3090 
   3091   cl_mem_flags
   3092     mem_flags;
   3093 
   3094   const void
   3095     *inputPixels;
   3096 
   3097   Image
   3098     *filteredImage;
   3099 
   3100   MagickBooleanType
   3101     outputReady;
   3102 
   3103   MagickCLDevice
   3104     device;
   3105 
   3106   MagickSizeType
   3107     length;
   3108 
   3109   void
   3110     *filteredPixels,
   3111     *hostPtr;
   3112 
   3113   unsigned int
   3114     i,
   3115     imageColumns,
   3116     imageRows,
   3117     passes;
   3118 
   3119   filteredImage = NULL;
   3120   filteredImage_view = NULL;
   3121   imageBuffer = NULL;
   3122   filteredImageBuffer = NULL;
   3123   tempImageBuffer = NULL;
   3124   imageKernelBuffer = NULL;
   3125   blurRowKernel = NULL;
   3126   blurColumnKernel = NULL;
   3127   queue = NULL;
   3128   outputReady = MagickFalse;
   3129 
   3130   device = RequestOpenCLDevice(clEnv);
   3131   queue = AcquireOpenCLCommandQueue(device);
   3132 
   3133   /* Create and initialize OpenCL buffers. */
   3134   {
   3135     image_view=AcquireAuthenticCacheView(image,exception);
   3136     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3137     if (inputPixels == (const void *) NULL)
   3138     {
   3139       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   3140       goto cleanup;
   3141     }
   3142 
   3143     /* If the host pointer is aligned to the size of CLPixelPacket,
   3144      then use the host buffer directly from the GPU; otherwise,
   3145      create a buffer on the GPU and copy the data over */
   3146     if (ALIGNED(inputPixels,CLPixelPacket))
   3147     {
   3148       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   3149     }
   3150     else
   3151     {
   3152       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   3153     }
   3154     /* create a CL buffer from image pixel buffer */
   3155     length = image->columns * image->rows;
   3156     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3157     if (clStatus != CL_SUCCESS)
   3158     {
   3159       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3160       goto cleanup;
   3161     }
   3162   }
   3163 
   3164   /* create output */
   3165   {
   3166     filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
   3167     assert(filteredImage != NULL);
   3168     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   3169     {
   3170       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   3171       goto cleanup;
   3172     }
   3173     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   3174     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   3175     if (filteredPixels == (void *) NULL)
   3176     {
   3177       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   3178       goto cleanup;
   3179     }
   3180 
   3181     if (ALIGNED(filteredPixels,CLPixelPacket))
   3182     {
   3183       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   3184       hostPtr = filteredPixels;
   3185     }
   3186     else
   3187     {
   3188       mem_flags = CL_MEM_WRITE_ONLY;
   3189       hostPtr = NULL;
   3190     }
   3191 
   3192     /* create a CL buffer from image pixel buffer */
   3193     length = image->columns * image->rows;
   3194     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   3195     if (clStatus != CL_SUCCESS)
   3196     {
   3197       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3198       goto cleanup;
   3199     }
   3200   }
   3201 
   3202   {
   3203     /* create temp buffer */
   3204     {
   3205       length = image->columns * image->rows;
   3206       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
   3207       if (clStatus != CL_SUCCESS)
   3208       {
   3209         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3210         goto cleanup;
   3211       }
   3212     }
   3213 
   3214     /* get the opencl kernel */
   3215     {
   3216       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
   3217       if (blurRowKernel == NULL)
   3218       {
   3219         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3220         goto cleanup;
   3221       };
   3222 
   3223       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
   3224       if (blurColumnKernel == NULL)
   3225       {
   3226         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3227         goto cleanup;
   3228       };
   3229     }
   3230 
   3231     {
   3232       imageColumns = (unsigned int) image->columns;
   3233       imageRows = (unsigned int) image->rows;
   3234       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
   3235 
   3236       passes = (((1.0f * imageColumns) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
   3237       passes = (passes < 1) ? 1: passes;
   3238 
   3239       /* set the kernel arguments */
   3240       i = 0;
   3241       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3242       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   3243       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   3244       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
   3245       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
   3246       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
   3247 
   3248       if (clStatus != CL_SUCCESS)
   3249       {
   3250         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3251         goto cleanup;
   3252       }
   3253     }
   3254 
   3255     /* launch the kernel */
   3256     {
   3257       int x;
   3258       for (x = 0; x < passes; ++x) {
   3259         size_t gsize[2];
   3260         size_t wsize[2];
   3261         size_t goffset[2];
   3262 
   3263         gsize[0] = 256;
   3264         gsize[1] = image->rows / passes;
   3265         wsize[0] = 256;
   3266         wsize[1] = 1;
   3267         goffset[0] = 0;
   3268         goffset[1] = x * gsize[1];
   3269 
   3270         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
   3271         if (clStatus != CL_SUCCESS)
   3272         {
   3273           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3274           goto cleanup;
   3275         }
   3276         clEnv->library->clFlush(queue);
   3277         RecordProfileData(device,blurRowKernel,event);
   3278       }
   3279     }
   3280 
   3281     {
   3282       cl_float FStrength = strength;
   3283       i = 0;
   3284       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3285       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   3286       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   3287       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
   3288       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
   3289       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
   3290       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
   3291 
   3292       if (clStatus != CL_SUCCESS)
   3293       {
   3294         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3295         goto cleanup;
   3296       }
   3297     }
   3298 
   3299     /* launch the kernel */
   3300     {
   3301       int x;
   3302       for (x = 0; x < passes; ++x) {
   3303         size_t gsize[2];
   3304         size_t wsize[2];
   3305         size_t goffset[2];
   3306 
   3307         gsize[0] = ((image->columns + 3) / 4) * 4;
   3308         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
   3309         wsize[0] = 4;
   3310         wsize[1] = 64;
   3311         goffset[0] = 0;
   3312         goffset[1] = x * gsize[1];
   3313 
   3314         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
   3315         if (clStatus != CL_SUCCESS)
   3316         {
   3317           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3318           goto cleanup;
   3319         }
   3320         clEnv->library->clFlush(queue);
   3321         RecordProfileData(device,blurColumnKernel,event);
   3322       }
   3323     }
   3324   }
   3325 
   3326   /* get result */
   3327   if (ALIGNED(filteredPixels,CLPixelPacket))
   3328   {
   3329     length = image->columns * image->rows;
   3330     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   3331   }
   3332   else
   3333   {
   3334     length = image->columns * image->rows;
   3335     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   3336   }
   3337   if (clStatus != CL_SUCCESS)
   3338   {
   3339     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   3340     goto cleanup;
   3341   }
   3342 
   3343   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   3344 
   3345 cleanup:
   3346 
   3347   image_view=DestroyCacheView(image_view);
   3348   if (filteredImage_view != NULL)
   3349     filteredImage_view=DestroyCacheView(filteredImage_view);
   3350 
   3351   if (imageBuffer!=NULL)
   3352     clEnv->library->clReleaseMemObject(imageBuffer);
   3353   if (filteredImageBuffer!=NULL)
   3354     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   3355   if (tempImageBuffer!=NULL)
   3356     clEnv->library->clReleaseMemObject(tempImageBuffer);
   3357   if (imageKernelBuffer!=NULL)
   3358     clEnv->library->clReleaseMemObject(imageKernelBuffer);
   3359   if (blurRowKernel!=NULL)
   3360     ReleaseOpenCLKernel(blurRowKernel);
   3361   if (blurColumnKernel!=NULL)
   3362     ReleaseOpenCLKernel(blurColumnKernel);
   3363   if (queue != NULL)
   3364     ReleaseOpenCLCommandQueue(device, queue);
   3365   if (device != NULL)
   3366     ReleaseOpenCLDevice(device);
   3367   if (outputReady == MagickFalse)
   3368   {
   3369     if (filteredImage != NULL)
   3370     {
   3371       DestroyImage(filteredImage);
   3372       filteredImage = NULL;
   3373     }
   3374   }
   3375 
   3376   return(filteredImage);
   3377 }
   3378 
   3379 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
   3380   const double radius,const double strength,ExceptionInfo *exception)
   3381 {
   3382   Image
   3383     *filteredImage;
   3384 
   3385   MagickCLEnv
   3386     clEnv;
   3387 
   3388   assert(image != NULL);
   3389   assert(exception != (ExceptionInfo *) NULL);
   3390 
   3391   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3392     return((Image *) NULL);
   3393 
   3394   clEnv=getOpenCLEnvironment(exception);
   3395   if (clEnv == (MagickCLEnv) NULL)
   3396     return((Image *) NULL);
   3397 
   3398   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
   3399     exception);
   3400   return(filteredImage);
   3401 }
   3402 
   3403 /*
   3404 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3405 %                                                                             %
   3406 %                                                                             %
   3407 %                                                                             %
   3408 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
   3409 %                                                                             %
   3410 %                                                                             %
   3411 %                                                                             %
   3412 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3413 */
   3414 
   3415 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
   3416   const double percent_brightness,const double percent_hue,
   3417   const double percent_saturation,const ColorspaceType colorspace,
   3418   ExceptionInfo *exception)
   3419 {
   3420   CacheView
   3421     *image_view;
   3422 
   3423   cl_float
   3424     bright,
   3425     hue,
   3426     saturation;
   3427 
   3428   cl_command_queue
   3429     queue;
   3430 
   3431   cl_int
   3432     color,
   3433     clStatus;
   3434 
   3435   cl_kernel
   3436     modulateKernel;
   3437 
   3438   cl_event
   3439     event;
   3440 
   3441   cl_mem
   3442     imageBuffer;
   3443 
   3444   cl_mem_flags
   3445     mem_flags;
   3446 
   3447   MagickBooleanType
   3448     outputReady;
   3449 
   3450   MagickCLDevice
   3451     device;
   3452 
   3453   MagickSizeType
   3454     length;
   3455 
   3456   register ssize_t
   3457     i;
   3458 
   3459   void
   3460     *inputPixels;
   3461 
   3462   inputPixels = NULL;
   3463   imageBuffer = NULL;
   3464   modulateKernel = NULL;
   3465 
   3466   assert(image != (Image *) NULL);
   3467   assert(image->signature == MagickCoreSignature);
   3468   if (image->debug != MagickFalse)
   3469     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   3470 
   3471   /*
   3472    * initialize opencl env
   3473    */
   3474   device = RequestOpenCLDevice(clEnv);
   3475   queue = AcquireOpenCLCommandQueue(device);
   3476 
   3477   outputReady = MagickFalse;
   3478 
   3479   /* Create and initialize OpenCL buffers.
   3480    inputPixels = AcquirePixelCachePixels(image, &length, exception);
   3481    assume this  will get a writable image
   3482    */
   3483   image_view=AcquireAuthenticCacheView(image,exception);
   3484   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3485   if (inputPixels == (void *) NULL)
   3486   {
   3487     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   3488     goto cleanup;
   3489   }
   3490 
   3491   /* If the host pointer is aligned to the size of CLPixelPacket,
   3492    then use the host buffer directly from the GPU; otherwise,
   3493    create a buffer on the GPU and copy the data over
   3494    */
   3495   if (ALIGNED(inputPixels,CLPixelPacket))
   3496   {
   3497     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   3498   }
   3499   else
   3500   {
   3501     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   3502   }
   3503   /* create a CL buffer from image pixel buffer */
   3504   length = image->columns * image->rows;
   3505   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3506   if (clStatus != CL_SUCCESS)
   3507   {
   3508     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3509     goto cleanup;
   3510   }
   3511 
   3512   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
   3513   if (modulateKernel == NULL)
   3514   {
   3515     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3516     goto cleanup;
   3517   }
   3518 
   3519   bright=percent_brightness;
   3520   hue=percent_hue;
   3521   saturation=percent_saturation;
   3522   color=colorspace;
   3523 
   3524   i = 0;
   3525   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3526   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
   3527   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
   3528   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
   3529   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
   3530   if (clStatus != CL_SUCCESS)
   3531   {
   3532     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3533     goto cleanup;
   3534   }
   3535 
   3536   {
   3537     size_t global_work_size[2];
   3538     global_work_size[0] = image->columns;
   3539     global_work_size[1] = image->rows;
   3540     /* launch the kernel */
   3541 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   3542     if (clStatus != CL_SUCCESS)
   3543     {
   3544       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3545       goto cleanup;
   3546     }
   3547     RecordProfileData(device,modulateKernel,event);
   3548   }
   3549 
   3550   if (ALIGNED(inputPixels,CLPixelPacket))
   3551   {
   3552     length = image->columns * image->rows;
   3553     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   3554   }
   3555   else
   3556   {
   3557     length = image->columns * image->rows;
   3558     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   3559   }
   3560   if (clStatus != CL_SUCCESS)
   3561   {
   3562     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   3563     goto cleanup;
   3564   }
   3565 
   3566   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   3567 
   3568 cleanup:
   3569 
   3570   image_view=DestroyCacheView(image_view);
   3571 
   3572   if (imageBuffer!=NULL)
   3573     clEnv->library->clReleaseMemObject(imageBuffer);
   3574   if (modulateKernel!=NULL)
   3575     ReleaseOpenCLKernel(modulateKernel);
   3576   if (queue != NULL)
   3577     ReleaseOpenCLCommandQueue(device,queue);
   3578   if (device != NULL)
   3579     ReleaseOpenCLDevice(device);
   3580 
   3581   return outputReady;
   3582 
   3583 }
   3584 
   3585 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
   3586   const double percent_brightness,const double percent_hue,
   3587   const double percent_saturation,const ColorspaceType colorspace,
   3588   ExceptionInfo *exception)
   3589 {
   3590   MagickBooleanType
   3591     status;
   3592 
   3593   MagickCLEnv
   3594     clEnv;
   3595 
   3596   assert(image != NULL);
   3597   assert(exception != (ExceptionInfo *) NULL);
   3598 
   3599   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3600     return(MagickFalse);
   3601 
   3602   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
   3603     return(MagickFalse);
   3604 
   3605   clEnv=getOpenCLEnvironment(exception);
   3606   if (clEnv == (MagickCLEnv) NULL)
   3607     return(MagickFalse);
   3608 
   3609   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
   3610     percent_saturation,colorspace,exception);
   3611   return(status);
   3612 }
   3613 
   3614 /*
   3615 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3616 %                                                                             %
   3617 %                                                                             %
   3618 %                                                                             %
   3619 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
   3620 %                                                                             %
   3621 %                                                                             %
   3622 %                                                                             %
   3623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3624 */
   3625 
   3626 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
   3627   const double *kernel,const size_t width,const OffsetInfo *offset,
   3628   ExceptionInfo *exception)
   3629 {
   3630   CacheView
   3631     *filteredImage_view,
   3632     *image_view;
   3633 
   3634   cl_command_queue
   3635     queue;
   3636 
   3637   cl_float4
   3638     biasPixel;
   3639 
   3640   cl_int
   3641     clStatus;
   3642 
   3643   cl_kernel
   3644     motionBlurKernel;
   3645 
   3646   cl_event
   3647     event;
   3648 
   3649   cl_mem
   3650     filteredImageBuffer,
   3651     imageBuffer,
   3652     imageKernelBuffer,
   3653     offsetBuffer;
   3654 
   3655   cl_mem_flags
   3656     mem_flags;
   3657 
   3658   const void
   3659     *inputPixels;
   3660 
   3661   float
   3662     *kernelBufferPtr;
   3663 
   3664   Image
   3665     *filteredImage;
   3666 
   3667   int
   3668     *offsetBufferPtr;
   3669 
   3670   MagickBooleanType
   3671     outputReady;
   3672 
   3673   MagickCLDevice
   3674     device;
   3675 
   3676   PixelInfo
   3677     bias;
   3678 
   3679   MagickSizeType
   3680     length;
   3681 
   3682   size_t
   3683     global_work_size[2],
   3684     local_work_size[2];
   3685 
   3686   unsigned int
   3687     i,
   3688     imageHeight,
   3689     imageWidth,
   3690     matte;
   3691 
   3692   void
   3693     *filteredPixels,
   3694     *hostPtr;
   3695 
   3696   outputReady = MagickFalse;
   3697   filteredImage = NULL;
   3698   filteredImage_view = NULL;
   3699   imageBuffer = NULL;
   3700   filteredImageBuffer = NULL;
   3701   imageKernelBuffer = NULL;
   3702   motionBlurKernel = NULL;
   3703   queue = NULL;
   3704 
   3705   device = RequestOpenCLDevice(clEnv);
   3706 
   3707   /* Create and initialize OpenCL buffers. */
   3708 
   3709   image_view=AcquireAuthenticCacheView(image,exception);
   3710   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3711   if (inputPixels == (const void *) NULL)
   3712   {
   3713     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
   3714       "UnableToReadPixelCache.","`%s'",image->filename);
   3715     goto cleanup;
   3716   }
   3717 
   3718   // If the host pointer is aligned to the size of CLPixelPacket,
   3719   // then use the host buffer directly from the GPU; otherwise,
   3720   // create a buffer on the GPU and copy the data over
   3721   if (ALIGNED(inputPixels,CLPixelPacket))
   3722   {
   3723     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   3724   }
   3725   else
   3726   {
   3727     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   3728   }
   3729   // create a CL buffer from image pixel buffer
   3730   length = image->columns * image->rows;
   3731   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
   3732     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3733   if (clStatus != CL_SUCCESS)
   3734   {
   3735     (void) ThrowMagickException(exception, GetMagickModule(),
   3736       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3737     goto cleanup;
   3738   }
   3739 
   3740 
   3741   filteredImage = CloneImage(image,image->columns,image->rows,
   3742     MagickTrue,exception);
   3743   assert(filteredImage != NULL);
   3744   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   3745   {
   3746     (void) ThrowMagickException(exception, GetMagickModule(),
   3747       ResourceLimitError, "CloneImage failed.", ".");
   3748     goto cleanup;
   3749   }
   3750   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   3751   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   3752   if (filteredPixels == (void *) NULL)
   3753   {
   3754     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
   3755       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   3756     goto cleanup;
   3757   }
   3758 
   3759   if (ALIGNED(filteredPixels,CLPixelPacket))
   3760   {
   3761     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   3762     hostPtr = filteredPixels;
   3763   }
   3764   else
   3765   {
   3766     mem_flags = CL_MEM_WRITE_ONLY;
   3767     hostPtr = NULL;
   3768   }
   3769   // create a CL buffer from image pixel buffer
   3770   length = image->columns * image->rows;
   3771   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
   3772     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   3773   if (clStatus != CL_SUCCESS)
   3774   {
   3775     (void) ThrowMagickException(exception, GetMagickModule(),
   3776       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3777     goto cleanup;
   3778   }
   3779 
   3780 
   3781   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
   3782     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
   3783     &clStatus);
   3784   if (clStatus != CL_SUCCESS)
   3785   {
   3786     (void) ThrowMagickException(exception, GetMagickModule(),
   3787       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3788     goto cleanup;
   3789   }
   3790 
   3791   queue = AcquireOpenCLCommandQueue(device);
   3792   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
   3793     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
   3794   if (clStatus != CL_SUCCESS)
   3795   {
   3796     (void) ThrowMagickException(exception, GetMagickModule(),
   3797       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
   3798     goto cleanup;
   3799   }
   3800   for (i = 0; i < width; i++)
   3801   {
   3802     kernelBufferPtr[i] = (float) kernel[i];
   3803   }
   3804   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
   3805     0, NULL, NULL);
   3806  if (clStatus != CL_SUCCESS)
   3807   {
   3808     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3809       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   3810     goto cleanup;
   3811   }
   3812 
   3813   offsetBuffer = clEnv->library->clCreateBuffer(device->context,
   3814     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
   3815     &clStatus);
   3816   if (clStatus != CL_SUCCESS)
   3817   {
   3818     (void) ThrowMagickException(exception, GetMagickModule(),
   3819       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3820     goto cleanup;
   3821   }
   3822 
   3823   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
   3824     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
   3825   if (clStatus != CL_SUCCESS)
   3826   {
   3827     (void) ThrowMagickException(exception, GetMagickModule(),
   3828       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
   3829     goto cleanup;
   3830   }
   3831   for (i = 0; i < width; i++)
   3832   {
   3833     offsetBufferPtr[2*i] = (int)offset[i].x;
   3834     offsetBufferPtr[2*i+1] = (int)offset[i].y;
   3835   }
   3836   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
   3837     NULL, NULL);
   3838  if (clStatus != CL_SUCCESS)
   3839   {
   3840     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3841       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   3842     goto cleanup;
   3843   }
   3844 
   3845 
   3846  // get the OpenCL kernel
   3847   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
   3848   if (motionBlurKernel == NULL)
   3849   {
   3850     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3851       "AcquireOpenCLKernel failed.", ".");
   3852     goto cleanup;
   3853   }
   3854 
   3855   // set the kernel arguments
   3856   i = 0;
   3857   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3858     (void *)&imageBuffer);
   3859   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3860     (void *)&filteredImageBuffer);
   3861   imageWidth = (unsigned int) image->columns;
   3862   imageHeight = (unsigned int) image->rows;
   3863   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3864     &imageWidth);
   3865   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3866     &imageHeight);
   3867   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3868     (void *)&imageKernelBuffer);
   3869   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3870     &width);
   3871   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3872     (void *)&offsetBuffer);
   3873 
   3874   GetPixelInfo(image,&bias);
   3875   biasPixel.s[0] = bias.red;
   3876   biasPixel.s[1] = bias.green;
   3877   biasPixel.s[2] = bias.blue;
   3878   biasPixel.s[3] = bias.alpha;
   3879   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
   3880 
   3881   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
   3882   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   3883   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
   3884   if (clStatus != CL_SUCCESS)
   3885   {
   3886     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3887       "clEnv->library->clSetKernelArg failed.", ".");
   3888     goto cleanup;
   3889   }
   3890 
   3891   // launch the kernel
   3892   local_work_size[0] = 16;
   3893   local_work_size[1] = 16;
   3894   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
   3895                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
   3896   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
   3897                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
   3898   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
   3899 	  global_work_size, local_work_size, 0, NULL, &event);
   3900 
   3901   if (clStatus != CL_SUCCESS)
   3902   {
   3903     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3904       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3905     goto cleanup;
   3906   }
   3907   RecordProfileData(device,motionBlurKernel,event);
   3908 
   3909   if (ALIGNED(filteredPixels,CLPixelPacket))
   3910   {
   3911     length = image->columns * image->rows;
   3912     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
   3913       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
   3914       NULL, &clStatus);
   3915   }
   3916   else
   3917   {
   3918     length = image->columns * image->rows;
   3919     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
   3920       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   3921   }
   3922   if (clStatus != CL_SUCCESS)
   3923   {
   3924     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3925       "Reading output image from CL buffer failed.", ".");
   3926     goto cleanup;
   3927   }
   3928   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   3929 
   3930 cleanup:
   3931 
   3932   image_view=DestroyCacheView(image_view);
   3933   if (filteredImage_view != NULL)
   3934     filteredImage_view=DestroyCacheView(filteredImage_view);
   3935 
   3936   if (filteredImageBuffer!=NULL)
   3937     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   3938   if (imageBuffer!=NULL)
   3939     clEnv->library->clReleaseMemObject(imageBuffer);
   3940   if (imageKernelBuffer!=NULL)
   3941     clEnv->library->clReleaseMemObject(imageKernelBuffer);
   3942   if (motionBlurKernel!=NULL)
   3943     ReleaseOpenCLKernel(motionBlurKernel);
   3944   if (queue != NULL)
   3945     ReleaseOpenCLCommandQueue(device,queue);
   3946   if (device != NULL)
   3947     ReleaseOpenCLDevice(device);
   3948   if (outputReady == MagickFalse && filteredImage != NULL)
   3949     filteredImage=DestroyImage(filteredImage);
   3950 
   3951   return(filteredImage);
   3952 }
   3953 
   3954 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
   3955   const double* kernel,const size_t width,const OffsetInfo *offset,
   3956   ExceptionInfo *exception)
   3957 {
   3958   Image
   3959     *filteredImage;
   3960 
   3961   MagickCLEnv
   3962     clEnv;
   3963 
   3964   assert(image != NULL);
   3965   assert(kernel != (double *) NULL);
   3966   assert(offset != (OffsetInfo *) NULL);
   3967   assert(exception != (ExceptionInfo *) NULL);
   3968 
   3969   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3970     return((Image *) NULL);
   3971 
   3972   clEnv=getOpenCLEnvironment(exception);
   3973   if (clEnv == (MagickCLEnv) NULL)
   3974     return((Image *) NULL);
   3975 
   3976   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
   3977     exception);
   3978   return(filteredImage);
   3979 }
   3980 
   3981 /*
   3982 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3983 %                                                                             %
   3984 %                                                                             %
   3985 %                                                                             %
   3986 %     A c c e l e r a t e R e s i z e I m a g e                               %
   3987 %                                                                             %
   3988 %                                                                             %
   3989 %                                                                             %
   3990 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3991 */
   3992 
   3993 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
   3994   cl_command_queue queue,const Image *image,Image *filteredImage,
   3995   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
   3996   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
   3997   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
   3998   const float xFactor,ExceptionInfo *exception)
   3999 {
   4000   cl_kernel
   4001     horizontalKernel;
   4002 
   4003   cl_int
   4004     status;
   4005 
   4006   const unsigned int
   4007     workgroupSize = 256;
   4008 
   4009   float
   4010     resizeFilterScale,
   4011     resizeFilterSupport,
   4012     resizeFilterWindowSupport,
   4013     resizeFilterBlur,
   4014     scale,
   4015     support;
   4016 
   4017   int
   4018     cacheRangeStart,
   4019     cacheRangeEnd,
   4020     numCachedPixels,
   4021     resizeFilterType,
   4022     resizeWindowType;
   4023 
   4024   MagickBooleanType
   4025     outputReady;
   4026 
   4027   size_t
   4028     gammaAccumulatorLocalMemorySize,
   4029     gsize[2],
   4030     i,
   4031     imageCacheLocalMemorySize,
   4032     pixelAccumulatorLocalMemorySize,
   4033     lsize[2],
   4034     totalLocalMemorySize,
   4035     weightAccumulatorLocalMemorySize;
   4036 
   4037   unsigned int
   4038     chunkSize,
   4039     pixelPerWorkgroup;
   4040 
   4041   horizontalKernel=NULL;
   4042   outputReady=MagickFalse;
   4043 
   4044   /*
   4045   Apply filter to resize vertically from image to resize image.
   4046   */
   4047   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
   4048   support=scale*GetResizeFilterSupport(resizeFilter);
   4049   if (support < 0.5)
   4050   {
   4051     /*
   4052     Support too small even for nearest neighbour: Reduce to point
   4053     sampling.
   4054     */
   4055     support=(MagickRealType) 0.5;
   4056     scale=1.0;
   4057   }
   4058   scale=PerceptibleReciprocal(scale);
   4059 
   4060   if (resizedColumns < workgroupSize)
   4061   {
   4062     chunkSize=32;
   4063     pixelPerWorkgroup=32;
   4064   }
   4065   else
   4066   {
   4067     chunkSize=workgroupSize;
   4068     pixelPerWorkgroup=workgroupSize;
   4069   }
   4070 
   4071 DisableMSCWarning(4127)
   4072   while(1)
   4073 RestoreMSCWarning
   4074   {
   4075     /* calculate the local memory size needed per workgroup */
   4076     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
   4077     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
   4078       MagickEpsilon)+support+0.5);
   4079     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
   4080     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
   4081       number_channels;
   4082     totalLocalMemorySize=imageCacheLocalMemorySize;
   4083 
   4084     /* local size for the pixel accumulator */
   4085     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
   4086     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
   4087 
   4088     /* local memory size for the weight accumulator */
   4089     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4090     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
   4091 
   4092     /* local memory size for the gamma accumulator */
   4093     if ((number_channels == 4) || (number_channels == 2))
   4094       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4095     else
   4096       gammaAccumulatorLocalMemorySize=sizeof(float);
   4097     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
   4098 
   4099     if (totalLocalMemorySize <= device->local_memory_size)
   4100       break;
   4101     else
   4102     {
   4103       pixelPerWorkgroup=pixelPerWorkgroup/2;
   4104       chunkSize=chunkSize/2;
   4105       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
   4106       {
   4107         /* quit, fallback to CPU */
   4108         goto cleanup;
   4109       }
   4110     }
   4111   }
   4112 
   4113   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
   4114   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
   4115 
   4116   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
   4117   if (horizontalKernel == (cl_kernel) NULL)
   4118   {
   4119     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4120       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
   4121     goto cleanup;
   4122   }
   4123 
   4124   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
   4125   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
   4126   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
   4127   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
   4128 
   4129   i=0;
   4130   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
   4131   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
   4132   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
   4133   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
   4134   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
   4135   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
   4136   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
   4137   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
   4138   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
   4139   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
   4140   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
   4141   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
   4142   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
   4143   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
   4144   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
   4145   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
   4146   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
   4147   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
   4148   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
   4149   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
   4150   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
   4151   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
   4152 
   4153   if (status != CL_SUCCESS)
   4154   {
   4155     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4156       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   4157     goto cleanup;
   4158   }
   4159 
   4160   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
   4161     workgroupSize;
   4162   gsize[1]=resizedRows;
   4163   lsize[0]=workgroupSize;
   4164   lsize[1]=1;
   4165   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
   4166     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
   4167 cleanup:
   4168 
   4169   if (horizontalKernel != (cl_kernel) NULL)
   4170     ReleaseOpenCLKernel(horizontalKernel);
   4171 
   4172   return(outputReady);
   4173 }
   4174 
   4175 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
   4176   cl_command_queue queue,const Image *image,Image * filteredImage,
   4177   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
   4178   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
   4179   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
   4180   const float yFactor,ExceptionInfo *exception)
   4181 {
   4182   cl_kernel
   4183     verticalKernel;
   4184 
   4185   cl_int
   4186     status;
   4187 
   4188   const unsigned int
   4189     workgroupSize = 256;
   4190 
   4191   float
   4192     resizeFilterScale,
   4193     resizeFilterSupport,
   4194     resizeFilterWindowSupport,
   4195     resizeFilterBlur,
   4196     scale,
   4197     support;
   4198 
   4199   int
   4200     cacheRangeStart,
   4201     cacheRangeEnd,
   4202     numCachedPixels,
   4203     resizeFilterType,
   4204     resizeWindowType;
   4205 
   4206   MagickBooleanType
   4207     outputReady;
   4208 
   4209   size_t
   4210     gammaAccumulatorLocalMemorySize,
   4211     gsize[2],
   4212     i,
   4213     imageCacheLocalMemorySize,
   4214     pixelAccumulatorLocalMemorySize,
   4215     lsize[2],
   4216     totalLocalMemorySize,
   4217     weightAccumulatorLocalMemorySize;
   4218 
   4219   unsigned int
   4220     chunkSize,
   4221     pixelPerWorkgroup;
   4222 
   4223   verticalKernel=NULL;
   4224   outputReady=MagickFalse;
   4225 
   4226   /*
   4227   Apply filter to resize vertically from image to resize image.
   4228   */
   4229   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
   4230   support=scale*GetResizeFilterSupport(resizeFilter);
   4231   if (support < 0.5)
   4232   {
   4233     /*
   4234     Support too small even for nearest neighbour: Reduce to point
   4235     sampling.
   4236     */
   4237     support=(MagickRealType) 0.5;
   4238     scale=1.0;
   4239   }
   4240   scale=PerceptibleReciprocal(scale);
   4241 
   4242   if (resizedRows < workgroupSize)
   4243   {
   4244     chunkSize=32;
   4245     pixelPerWorkgroup=32;
   4246   }
   4247   else
   4248   {
   4249     chunkSize=workgroupSize;
   4250     pixelPerWorkgroup=workgroupSize;
   4251   }
   4252 
   4253 DisableMSCWarning(4127)
   4254   while(1)
   4255 RestoreMSCWarning
   4256   {
   4257     /* calculate the local memory size needed per workgroup */
   4258     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
   4259     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
   4260       MagickEpsilon)+support+0.5);
   4261     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
   4262     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
   4263       number_channels;
   4264     totalLocalMemorySize=imageCacheLocalMemorySize;
   4265 
   4266     /* local size for the pixel accumulator */
   4267     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
   4268     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
   4269 
   4270     /* local memory size for the weight accumulator */
   4271     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4272     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
   4273 
   4274     /* local memory size for the gamma accumulator */
   4275     if ((number_channels == 4) || (number_channels == 2))
   4276       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4277     else
   4278       gammaAccumulatorLocalMemorySize=sizeof(float);
   4279     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
   4280 
   4281     if (totalLocalMemorySize <= device->local_memory_size)
   4282       break;
   4283     else
   4284     {
   4285       pixelPerWorkgroup=pixelPerWorkgroup/2;
   4286       chunkSize=chunkSize/2;
   4287       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
   4288       {
   4289         /* quit, fallback to CPU */
   4290         goto cleanup;
   4291       }
   4292     }
   4293   }
   4294 
   4295   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
   4296   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
   4297 
   4298   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
   4299   if (verticalKernel == (cl_kernel) NULL)
   4300   {
   4301     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4302       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4303     goto cleanup;
   4304   }
   4305 
   4306   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
   4307   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
   4308   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
   4309   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
   4310 
   4311   i=0;
   4312   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
   4313   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
   4314   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
   4315   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
   4316   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
   4317   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
   4318   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
   4319   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
   4320   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
   4321   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
   4322   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
   4323   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
   4324   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
   4325   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
   4326   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
   4327   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
   4328   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
   4329   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
   4330   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
   4331   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
   4332   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
   4333   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
   4334 
   4335   if (status != CL_SUCCESS)
   4336   {
   4337     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4338       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   4339     goto cleanup;
   4340   }
   4341 
   4342   gsize[0]=resizedColumns;
   4343   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
   4344     workgroupSize;
   4345   lsize[0]=1;
   4346   lsize[1]=workgroupSize;
   4347   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
   4348     gsize,lsize,image,filteredImage,exception);
   4349 
   4350 cleanup:
   4351 
   4352   if (verticalKernel != (cl_kernel) NULL)
   4353     ReleaseOpenCLKernel(verticalKernel);
   4354 
   4355   return(outputReady);
   4356 }
   4357 
   4358 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
   4359   const size_t resizedColumns,const size_t resizedRows,
   4360   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
   4361 {
   4362   cl_command_queue
   4363     queue;
   4364 
   4365   cl_mem
   4366     cubicCoefficientsBuffer,
   4367     filteredImageBuffer,
   4368     imageBuffer,
   4369     tempImageBuffer;
   4370 
   4371   cl_uint
   4372     number_channels;
   4373 
   4374   const double
   4375     *resizeFilterCoefficient;
   4376 
   4377   float
   4378     coefficientBuffer[7],
   4379     xFactor,
   4380     yFactor;
   4381 
   4382   MagickBooleanType
   4383     outputReady;
   4384 
   4385   MagickCLDevice
   4386     device;
   4387 
   4388   MagickSizeType
   4389     length;
   4390 
   4391   Image
   4392     *filteredImage;
   4393 
   4394   size_t
   4395     i;
   4396 
   4397   filteredImage=NULL;
   4398   tempImageBuffer=NULL;
   4399   cubicCoefficientsBuffer=NULL;
   4400   outputReady=MagickFalse;
   4401 
   4402   device=RequestOpenCLDevice(clEnv);
   4403   queue=AcquireOpenCLCommandQueue(device);
   4404   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
   4405     exception);
   4406   if (filteredImage == (Image *) NULL)
   4407     goto cleanup;
   4408   if (filteredImage->number_channels != image->number_channels)
   4409     goto cleanup;
   4410   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4411   if (imageBuffer == (cl_mem) NULL)
   4412     goto cleanup;
   4413   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4414   if (filteredImageBuffer == (cl_mem) NULL)
   4415     goto cleanup;
   4416 
   4417   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
   4418   for (i = 0; i < 7; i++)
   4419     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
   4420   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
   4421     CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
   4422   if (cubicCoefficientsBuffer == (cl_mem) NULL)
   4423   {
   4424     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4425       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4426     goto cleanup;
   4427   }
   4428 
   4429   number_channels=(cl_uint) image->number_channels;
   4430   xFactor=(float) resizedColumns/(float) image->columns;
   4431   yFactor=(float) resizedRows/(float) image->rows;
   4432   if (xFactor > yFactor)
   4433   {
   4434     length=resizedColumns*image->rows*number_channels;
   4435     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4436       sizeof(CLQuantum),(void *) NULL);
   4437     if (tempImageBuffer == (cl_mem) NULL)
   4438     {
   4439       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4440         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4441       goto cleanup;
   4442     }
   4443 
   4444     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
   4445       imageBuffer,number_channels,(cl_uint) image->columns,
   4446       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
   4447       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
   4448       exception);
   4449     if (outputReady == MagickFalse)
   4450       goto cleanup;
   4451 
   4452     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
   4453       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
   4454       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
   4455       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
   4456       exception);
   4457     if (outputReady == MagickFalse)
   4458       goto cleanup;
   4459   }
   4460   else
   4461   {
   4462     length=image->columns*resizedRows*number_channels;
   4463     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4464       sizeof(CLQuantum),(void *) NULL);
   4465     if (tempImageBuffer == (cl_mem) NULL)
   4466     {
   4467       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4468         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4469       goto cleanup;
   4470     }
   4471 
   4472     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
   4473       imageBuffer,number_channels,(cl_uint) image->columns,
   4474       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
   4475       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
   4476       exception);
   4477     if (outputReady == MagickFalse)
   4478       goto cleanup;
   4479 
   4480     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
   4481       tempImageBuffer,number_channels,(cl_uint) image->columns,
   4482       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
   4483       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
   4484       exception);
   4485     if (outputReady == MagickFalse)
   4486       goto cleanup;
   4487   }
   4488 
   4489 cleanup:
   4490 
   4491   if (tempImageBuffer != (cl_mem) NULL)
   4492     ReleaseOpenCLMemObject(tempImageBuffer);
   4493   if (cubicCoefficientsBuffer != (cl_mem) NULL)
   4494     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
   4495   if (queue != (cl_command_queue) NULL)
   4496     ReleaseOpenCLCommandQueue(device,queue);
   4497   if (device != (MagickCLDevice) NULL)
   4498     ReleaseOpenCLDevice(device);
   4499   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4500     filteredImage=DestroyImage(filteredImage);
   4501 
   4502   return(filteredImage);
   4503 }
   4504 
   4505 static MagickBooleanType gpuSupportedResizeWeighting(
   4506   ResizeWeightingFunctionType f)
   4507 {
   4508   unsigned int
   4509     i;
   4510 
   4511   for (i = 0; ;i++)
   4512   {
   4513     if (supportedResizeWeighting[i] == LastWeightingFunction)
   4514       break;
   4515     if (supportedResizeWeighting[i] == f)
   4516       return(MagickTrue);
   4517   }
   4518   return(MagickFalse);
   4519 }
   4520 
   4521 MagickPrivate Image *AccelerateResizeImage(const Image *image,
   4522   const size_t resizedColumns,const size_t resizedRows,
   4523   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
   4524 {
   4525   Image
   4526     *filteredImage;
   4527 
   4528   MagickCLEnv
   4529     clEnv;
   4530 
   4531   assert(image != NULL);
   4532   assert(exception != (ExceptionInfo *) NULL);
   4533 
   4534   if (checkAccelerateCondition(image) == MagickFalse)
   4535     return((Image *) NULL);
   4536 
   4537   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
   4538          resizeFilter)) == MagickFalse) ||
   4539       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
   4540          resizeFilter)) == MagickFalse))
   4541     return((Image *) NULL);
   4542 
   4543   clEnv=getOpenCLEnvironment(exception);
   4544   if (clEnv == (MagickCLEnv) NULL)
   4545     return((Image *) NULL);
   4546 
   4547   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
   4548     resizeFilter,exception);
   4549   return(filteredImage);
   4550 }
   4551 
   4552 /*
   4553 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4554 %                                                                             %
   4555 %                                                                             %
   4556 %                                                                             %
   4557 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
   4558 %                                                                             %
   4559 %                                                                             %
   4560 %                                                                             %
   4561 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4562 */
   4563 
   4564 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
   4565   const double angle,ExceptionInfo *exception)
   4566 {
   4567   cl_command_queue
   4568     queue;
   4569 
   4570   cl_float2
   4571     blurCenter;
   4572 
   4573   cl_int
   4574     status;
   4575 
   4576   cl_mem
   4577     cosThetaBuffer,
   4578     filteredImageBuffer,
   4579     imageBuffer,
   4580     sinThetaBuffer;
   4581 
   4582   cl_kernel
   4583     rotationalBlurKernel;
   4584 
   4585   cl_uint
   4586     cossin_theta_size,
   4587     number_channels;
   4588 
   4589   float
   4590     blurRadius,
   4591     *cosThetaPtr,
   4592     offset,
   4593     *sinThetaPtr,
   4594     theta;
   4595 
   4596   Image
   4597     *filteredImage;
   4598 
   4599   MagickBooleanType
   4600     outputReady;
   4601 
   4602   MagickCLDevice
   4603     device;
   4604 
   4605   size_t
   4606     gsize[2],
   4607     i;
   4608 
   4609   filteredImage=NULL;
   4610   sinThetaBuffer=NULL;
   4611   cosThetaBuffer=NULL;
   4612   rotationalBlurKernel=NULL;
   4613   outputReady=MagickFalse;
   4614 
   4615   device=RequestOpenCLDevice(clEnv);
   4616   queue=AcquireOpenCLCommandQueue(device);
   4617   filteredImage=cloneImage(image,exception);
   4618   if (filteredImage == (Image *) NULL)
   4619     goto cleanup;
   4620   if (filteredImage->number_channels != image->number_channels)
   4621     goto cleanup;
   4622   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4623   if (imageBuffer == (cl_mem) NULL)
   4624     goto cleanup;
   4625   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4626   if (filteredImageBuffer == (cl_mem) NULL)
   4627     goto cleanup;
   4628 
   4629   blurCenter.x=(float) (image->columns-1)/2.0;
   4630   blurCenter.y=(float) (image->rows-1)/2.0;
   4631   blurRadius=hypot(blurCenter.x,blurCenter.y);
   4632   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
   4633     (double) blurRadius)+2UL);
   4634 
   4635   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
   4636   if (cosThetaPtr == (float *) NULL)
   4637     goto cleanup;
   4638   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
   4639   if (sinThetaPtr == (float *) NULL)
   4640   {
   4641     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
   4642     goto cleanup;
   4643   }
   4644 
   4645   theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
   4646   offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
   4647   for (i=0; i < (ssize_t) cossin_theta_size; i++)
   4648   {
   4649     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
   4650     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
   4651   }
   4652 
   4653   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   4654     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
   4655   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
   4656   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   4657     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
   4658   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
   4659   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
   4660   {
   4661     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4662       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4663     goto cleanup;
   4664   }
   4665 
   4666   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
   4667   if (rotationalBlurKernel == (cl_kernel) NULL)
   4668   {
   4669     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4670       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4671     goto cleanup;
   4672   }
   4673 
   4674   number_channels=(cl_uint) image->number_channels;
   4675 
   4676   i=0;
   4677   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4678   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
   4679   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
   4680   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
   4681   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
   4682   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
   4683   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
   4684   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   4685   if (status != CL_SUCCESS)
   4686   {
   4687     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4688       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4689     goto cleanup;
   4690   }
   4691 
   4692   gsize[0]=image->columns;
   4693   gsize[1]=image->rows;
   4694   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
   4695     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
   4696     exception);
   4697 
   4698 cleanup:
   4699 
   4700   if (sinThetaBuffer != (cl_mem) NULL)
   4701     ReleaseOpenCLMemObject(sinThetaBuffer);
   4702   if (cosThetaBuffer != (cl_mem) NULL)
   4703     ReleaseOpenCLMemObject(cosThetaBuffer);
   4704   if (rotationalBlurKernel != (cl_kernel) NULL)
   4705     ReleaseOpenCLKernel(rotationalBlurKernel);
   4706   if (queue != (cl_command_queue) NULL)
   4707     ReleaseOpenCLCommandQueue(device,queue);
   4708   if (device != (MagickCLDevice) NULL)
   4709     ReleaseOpenCLDevice(device);
   4710   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4711     filteredImage=DestroyImage(filteredImage);
   4712 
   4713   return(filteredImage);
   4714 }
   4715 
   4716 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
   4717   const double angle,ExceptionInfo *exception)
   4718 {
   4719   Image
   4720     *filteredImage;
   4721 
   4722   MagickCLEnv
   4723     clEnv;
   4724 
   4725   assert(image != NULL);
   4726   assert(exception != (ExceptionInfo *) NULL);
   4727 
   4728   if (checkAccelerateCondition(image) == MagickFalse)
   4729     return((Image *) NULL);
   4730 
   4731   clEnv=getOpenCLEnvironment(exception);
   4732   if (clEnv == (MagickCLEnv) NULL)
   4733     return((Image *) NULL);
   4734 
   4735   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
   4736   return filteredImage;
   4737 }
   4738 
   4739 /*
   4740 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4741 %                                                                             %
   4742 %                                                                             %
   4743 %                                                                             %
   4744 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
   4745 %                                                                             %
   4746 %                                                                             %
   4747 %                                                                             %
   4748 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4749 */
   4750 
   4751 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
   4752   const double radius,const double sigma,const double gain,
   4753   const double threshold,ExceptionInfo *exception)
   4754 {
   4755   cl_command_queue
   4756     queue;
   4757 
   4758   cl_int
   4759     status;
   4760 
   4761   cl_kernel
   4762     blurRowKernel,
   4763     unsharpMaskBlurColumnKernel;
   4764 
   4765   cl_mem
   4766     filteredImageBuffer,
   4767     imageBuffer,
   4768     imageKernelBuffer,
   4769     tempImageBuffer;
   4770 
   4771   cl_uint
   4772     imageColumns,
   4773     imageRows,
   4774     kernelWidth,
   4775     number_channels;
   4776 
   4777   float
   4778     fGain,
   4779     fThreshold;
   4780 
   4781   Image
   4782     *filteredImage;
   4783 
   4784   int
   4785     chunkSize;
   4786 
   4787   MagickBooleanType
   4788     outputReady;
   4789 
   4790   MagickCLDevice
   4791     device;
   4792 
   4793   MagickSizeType
   4794     length;
   4795 
   4796   size_t
   4797     gsize[2],
   4798     i,
   4799     lsize[2];
   4800 
   4801   filteredImage=NULL;
   4802   tempImageBuffer=NULL;
   4803   imageKernelBuffer=NULL;
   4804   blurRowKernel=NULL;
   4805   unsharpMaskBlurColumnKernel=NULL;
   4806   outputReady=MagickFalse;
   4807 
   4808   device=RequestOpenCLDevice(clEnv);
   4809   queue=AcquireOpenCLCommandQueue(device);
   4810   filteredImage=cloneImage(image,exception);
   4811   if (filteredImage == (Image *) NULL)
   4812     goto cleanup;
   4813   if (filteredImage->number_channels != image->number_channels)
   4814     goto cleanup;
   4815   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4816   if (imageBuffer == (cl_mem) NULL)
   4817     goto cleanup;
   4818   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4819   if (filteredImageBuffer == (cl_mem) NULL)
   4820     goto cleanup;
   4821 
   4822   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
   4823     exception);
   4824 
   4825   length=image->columns*image->rows;
   4826   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4827     sizeof(cl_float4),NULL);
   4828   if (tempImageBuffer == (cl_mem) NULL)
   4829   {
   4830     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4831       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4832     goto cleanup;
   4833   }
   4834 
   4835   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
   4836   if (blurRowKernel == (cl_kernel) NULL)
   4837   {
   4838     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4839       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4840     goto cleanup;
   4841   }
   4842 
   4843   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
   4844     "UnsharpMaskBlurColumn");
   4845   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
   4846   {
   4847     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4848       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4849     goto cleanup;
   4850   }
   4851 
   4852   number_channels=(cl_uint) image->number_channels;
   4853   imageColumns=(cl_uint) image->columns;
   4854   imageRows=(cl_uint) image->rows;
   4855 
   4856   chunkSize = 256;
   4857 
   4858   i=0;
   4859   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4860   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
   4861   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
   4862   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   4863   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   4864   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   4865   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   4866   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
   4867   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   4868   if (status != CL_SUCCESS)
   4869   {
   4870     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4871       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4872     goto cleanup;
   4873   }
   4874 
   4875   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
   4876   gsize[1]=image->rows;
   4877   lsize[0]=chunkSize;
   4878   lsize[1]=1;
   4879   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
   4880     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
   4881 
   4882   chunkSize=256;
   4883   fGain=(float) gain;
   4884   fThreshold=(float) threshold;
   4885 
   4886   i=0;
   4887   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4888   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   4889   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
   4890   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
   4891   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   4892   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   4893   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
   4894   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
   4895   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   4896   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   4897   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
   4898   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
   4899   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   4900   if (status != CL_SUCCESS)
   4901   {
   4902     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4903       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4904     goto cleanup;
   4905   }
   4906 
   4907   gsize[0]=image->columns;
   4908   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
   4909   lsize[0]=1;
   4910   lsize[1]=chunkSize;
   4911   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
   4912     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
   4913 
   4914 cleanup:
   4915 
   4916   if (tempImageBuffer != (cl_mem) NULL)
   4917     ReleaseOpenCLMemObject(tempImageBuffer);
   4918   if (imageKernelBuffer != (cl_mem) NULL)
   4919     ReleaseOpenCLMemObject(imageKernelBuffer);
   4920   if (blurRowKernel != (cl_kernel) NULL)
   4921     ReleaseOpenCLKernel(blurRowKernel);
   4922   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
   4923     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
   4924   if (queue != (cl_command_queue) NULL)
   4925     ReleaseOpenCLCommandQueue(device,queue);
   4926   if (device != (MagickCLDevice) NULL)
   4927     ReleaseOpenCLDevice(device);
   4928   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4929     filteredImage=DestroyImage(filteredImage);
   4930 
   4931   return(filteredImage);
   4932 }
   4933 
   4934 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
   4935   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
   4936   const double threshold,ExceptionInfo *exception)
   4937 {
   4938   cl_command_queue
   4939     queue;
   4940 
   4941   cl_int
   4942     status;
   4943 
   4944   cl_kernel
   4945     unsharpMaskKernel;
   4946 
   4947   cl_mem
   4948     filteredImageBuffer,
   4949     imageBuffer,
   4950     imageKernelBuffer;
   4951 
   4952   cl_uint
   4953     imageColumns,
   4954     imageRows,
   4955     kernelWidth,
   4956     number_channels;
   4957 
   4958   float
   4959     fGain,
   4960     fThreshold;
   4961 
   4962   Image
   4963     *filteredImage;
   4964 
   4965   MagickBooleanType
   4966     outputReady;
   4967 
   4968   MagickCLDevice
   4969     device;
   4970 
   4971   size_t
   4972     gsize[2],
   4973     i,
   4974     lsize[2];
   4975 
   4976   filteredImage=NULL;
   4977   imageKernelBuffer=NULL;
   4978   unsharpMaskKernel=NULL;
   4979   outputReady=MagickFalse;
   4980 
   4981   device=RequestOpenCLDevice(clEnv);
   4982   queue=AcquireOpenCLCommandQueue(device);
   4983   filteredImage=cloneImage(image,exception);
   4984   if (filteredImage == (Image *) NULL)
   4985     goto cleanup;
   4986   if (filteredImage->number_channels != image->number_channels)
   4987     goto cleanup;
   4988   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4989   if (imageBuffer == (cl_mem) NULL)
   4990     goto cleanup;
   4991   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4992   if (filteredImageBuffer == (cl_mem) NULL)
   4993     goto cleanup;
   4994 
   4995   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
   4996     exception);
   4997 
   4998   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
   4999   if (unsharpMaskKernel == NULL)
   5000   {
   5001     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5002       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   5003     goto cleanup;
   5004   }
   5005 
   5006   imageColumns=(cl_uint) image->columns;
   5007   imageRows=(cl_uint) image->rows;
   5008   number_channels=(cl_uint) image->number_channels;
   5009   fGain=(float) gain;
   5010   fThreshold=(float) threshold;
   5011 
   5012   i=0;
   5013   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   5014   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   5015   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   5016   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   5017   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   5018   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   5019   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   5020   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
   5021   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
   5022   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
   5023   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   5024   if (status != CL_SUCCESS)
   5025   {
   5026     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5027       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   5028     goto cleanup;
   5029   }
   5030 
   5031   gsize[0]=((image->columns + 7) / 8)*8;
   5032   gsize[1]=((image->rows + 31) / 32)*32;
   5033   lsize[0]=8;
   5034   lsize[1]=32;
   5035   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
   5036     gsize,lsize,image,filteredImage,exception);
   5037 
   5038 cleanup:
   5039 
   5040   if (imageKernelBuffer != (cl_mem) NULL)
   5041     ReleaseOpenCLMemObject(imageKernelBuffer);
   5042   if (unsharpMaskKernel != (cl_kernel) NULL)
   5043     ReleaseOpenCLKernel(unsharpMaskKernel);
   5044   if (queue != (cl_command_queue) NULL)
   5045     ReleaseOpenCLCommandQueue(device,queue);
   5046   if (device != (MagickCLDevice) NULL)
   5047     ReleaseOpenCLDevice(device);
   5048   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   5049     filteredImage=DestroyImage(filteredImage);
   5050 
   5051   return(filteredImage);
   5052 }
   5053 
   5054 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
   5055   const double radius,const double sigma,const double gain,
   5056   const double threshold,ExceptionInfo *exception)
   5057 {
   5058   Image
   5059     *filteredImage;
   5060 
   5061   MagickCLEnv
   5062     clEnv;
   5063 
   5064   assert(image != NULL);
   5065   assert(exception != (ExceptionInfo *) NULL);
   5066 
   5067   if (checkAccelerateCondition(image) == MagickFalse)
   5068     return((Image *) NULL);
   5069 
   5070   clEnv=getOpenCLEnvironment(exception);
   5071   if (clEnv == (MagickCLEnv) NULL)
   5072     return((Image *) NULL);
   5073 
   5074   if (radius < 12.1)
   5075     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
   5076       threshold,exception);
   5077   else
   5078     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
   5079       threshold,exception);
   5080   return(filteredImage);
   5081 }
   5082 
   5083 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
   5084   const double threshold,ExceptionInfo *exception)
   5085 {
   5086   cl_command_queue
   5087     queue;
   5088 
   5089   const cl_int
   5090     PASSES=5;
   5091 
   5092   const int
   5093     TILESIZE=64,
   5094     PAD=1<<(PASSES-1),
   5095     SIZE=TILESIZE-2*PAD;
   5096 
   5097   cl_float
   5098     thresh;
   5099 
   5100   cl_int
   5101     status;
   5102 
   5103   cl_kernel
   5104     denoiseKernel;
   5105 
   5106   cl_mem
   5107     filteredImageBuffer,
   5108     imageBuffer;
   5109 
   5110   cl_uint
   5111     number_channels,
   5112     width,
   5113     height,
   5114     max_channels;
   5115 
   5116   Image
   5117     *filteredImage;
   5118 
   5119   MagickBooleanType
   5120     outputReady;
   5121 
   5122   MagickCLDevice
   5123     device;
   5124 
   5125   size_t
   5126     gsize[2],
   5127     i,
   5128     lsize[2];
   5129 
   5130   filteredImage=NULL;
   5131   denoiseKernel=NULL;
   5132   outputReady=MagickFalse;
   5133 
   5134   device=RequestOpenCLDevice(clEnv);
   5135   queue=AcquireOpenCLCommandQueue(device);
   5136   filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
   5137     exception);
   5138   if (filteredImage == (Image *) NULL)
   5139     goto cleanup;
   5140   if (filteredImage->number_channels != image->number_channels)
   5141     goto cleanup;
   5142   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   5143   if (imageBuffer == (cl_mem) NULL)
   5144     goto cleanup;
   5145   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   5146   if (filteredImageBuffer == (cl_mem) NULL)
   5147     goto cleanup;
   5148 
   5149   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
   5150   if (denoiseKernel == (cl_kernel) NULL)
   5151   {
   5152     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5153       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   5154     goto cleanup;
   5155   }
   5156 
   5157   number_channels=(cl_uint)image->number_channels;
   5158   width=(cl_uint)image->columns;
   5159   height=(cl_uint)image->rows;
   5160   max_channels=number_channels;
   5161   if ((max_channels == 4) || (max_channels == 2))
   5162     max_channels=max_channels-1;
   5163   thresh=threshold;
   5164 
   5165   i=0;
   5166   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   5167   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   5168   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   5169   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
   5170   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
   5171   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
   5172   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
   5173   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
   5174   if (status != CL_SUCCESS)
   5175   {
   5176     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5177       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   5178     goto cleanup;
   5179   }
   5180 
   5181   gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
   5182   gsize[1]=((height+(SIZE-1))/SIZE)*4;
   5183   lsize[0]=TILESIZE;
   5184   lsize[1]=4;
   5185   outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,(const size_t *) NULL,
   5186     gsize,lsize,image,filteredImage,exception);
   5187 
   5188 cleanup:
   5189 
   5190   if (denoiseKernel != (cl_kernel) NULL)
   5191     ReleaseOpenCLKernel(denoiseKernel);
   5192   if (queue != (cl_command_queue) NULL)
   5193     ReleaseOpenCLCommandQueue(device,queue);
   5194   if (device != (MagickCLDevice) NULL)
   5195     ReleaseOpenCLDevice(device);
   5196   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   5197     filteredImage=DestroyImage(filteredImage);
   5198 
   5199   return(filteredImage);
   5200 }
   5201 
   5202 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
   5203   const double threshold,ExceptionInfo *exception)
   5204 {
   5205   Image
   5206     *filteredImage;
   5207 
   5208   MagickCLEnv
   5209     clEnv;
   5210 
   5211   assert(image != NULL);
   5212   assert(exception != (ExceptionInfo *)NULL);
   5213 
   5214   if (checkAccelerateCondition(image) == MagickFalse)
   5215     return((Image *) NULL);
   5216 
   5217   clEnv=getOpenCLEnvironment(exception);
   5218   if (clEnv == (MagickCLEnv) NULL)
   5219     return((Image *) NULL);
   5220 
   5221   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
   5222 
   5223   return(filteredImage);
   5224 }
   5225 #endif /* MAGICKCORE_OPENCL_SUPPORT */
   5226