Home | History | Annotate | Download | only in MagickCore
      1 /*
      2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      3 %                                                                             %
      4 %                                                                             %
      5 %                                                                             %
      6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
      7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
      8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
      9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
     10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
     11 %                                                                             %
     12 %                                                                             %
     13 %                       MagickCore Acceleration Methods                       %
     14 %                                                                             %
     15 %                              Software Design                                %
     16 %                                  Cristy                                     %
     17 %                               SiuChi Chan                                   %
     18 %                              Guansong Zhang                                 %
     19 %                               January 2010                                  %
     20 %                               Dirk Lemstra                                  %
     21 %                                April 2016                                   %
     22 %                                                                             %
     23 %                                                                             %
     24 %  Copyright 1999-2019 ImageMagick Studio LLC, a non-profit organization      %
     25 %  dedicated to making software imaging solutions freely available.           %
     26 %                                                                             %
     27 %  You may not use this file except in compliance with the License.  You may  %
     28 %  obtain a copy of the License at                                            %
     29 %                                                                             %
     30 %    https://imagemagick.org/script/license.php                               %
     31 %                                                                             %
     32 %  Unless required by applicable law or agreed to in writing, software        %
     33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
     34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
     35 %  See the License for the specific language governing permissions and        %
     36 %  limitations under the License.                                             %
     37 %                                                                             %
     38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     39 */
     40 
     41 /*
     42 Include declarations.
     43 */
     44 #include "MagickCore/studio.h"
     45 #include "MagickCore/accelerate-private.h"
     46 #include "MagickCore/accelerate-kernels-private.h"
     47 #include "MagickCore/artifact.h"
     48 #include "MagickCore/cache.h"
     49 #include "MagickCore/cache-private.h"
     50 #include "MagickCore/cache-view.h"
     51 #include "MagickCore/color-private.h"
     52 #include "MagickCore/delegate-private.h"
     53 #include "MagickCore/enhance.h"
     54 #include "MagickCore/exception.h"
     55 #include "MagickCore/exception-private.h"
     56 #include "MagickCore/gem.h"
     57 #include "MagickCore/image.h"
     58 #include "MagickCore/image-private.h"
     59 #include "MagickCore/linked-list.h"
     60 #include "MagickCore/list.h"
     61 #include "MagickCore/memory_.h"
     62 #include "MagickCore/monitor-private.h"
     63 #include "MagickCore/opencl.h"
     64 #include "MagickCore/opencl-private.h"
     65 #include "MagickCore/option.h"
     66 #include "MagickCore/pixel-accessor.h"
     67 #include "MagickCore/pixel-private.h"
     68 #include "MagickCore/prepress.h"
     69 #include "MagickCore/quantize.h"
     70 #include "MagickCore/quantum-private.h"
     71 #include "MagickCore/random_.h"
     72 #include "MagickCore/random-private.h"
     73 #include "MagickCore/registry.h"
     74 #include "MagickCore/resize.h"
     75 #include "MagickCore/resize-private.h"
     76 #include "MagickCore/semaphore.h"
     77 #include "MagickCore/splay-tree.h"
     78 #include "MagickCore/statistic.h"
     79 #include "MagickCore/string_.h"
     80 #include "MagickCore/string-private.h"
     81 #include "MagickCore/token.h"
     82 
     83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
     84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
     85 
     86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
     87 
     88 /*
     89   Define declarations.
     90 */
     91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
     92 
     93 /*
     94   Static declarations.
     95 */
     96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
     97 {
     98   BoxWeightingFunction,
     99   TriangleWeightingFunction,
    100   HannWeightingFunction,
    101   HammingWeightingFunction,
    102   BlackmanWeightingFunction,
    103   CubicBCWeightingFunction,
    104   SincWeightingFunction,
    105   SincFastWeightingFunction,
    106   LastWeightingFunction
    107 };
    108 
    109 /*
    110   Helper functions.
    111 */
    112 static MagickBooleanType checkAccelerateCondition(const Image* image)
    113 {
    114   /* only direct class images are supported */
    115   if (image->storage_class != DirectClass)
    116     return(MagickFalse);
    117 
    118   /* check if the image's colorspace is supported */
    119   if (image->colorspace != RGBColorspace &&
    120       image->colorspace != sRGBColorspace &&
    121       image->colorspace != LinearGRAYColorspace &&
    122       image->colorspace != GRAYColorspace)
    123     return(MagickFalse);
    124 
    125   /* check if the virtual pixel method is compatible with the OpenCL implementation */
    126   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
    127       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
    128     return(MagickFalse);
    129 
    130   /* check if the image has mask */
    131   if (((image->channels & ReadMaskChannel) != 0) ||
    132       ((image->channels & WriteMaskChannel) != 0) ||
    133       ((image->channels & CompositeMaskChannel) != 0))
    134     return(MagickFalse);
    135 
    136   if (image->number_channels > 4)
    137     return(MagickFalse);
    138 
    139   /* check if pixel order is R */
    140   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
    141     return(MagickFalse);
    142 
    143   if (image->number_channels == 1)
    144     return(MagickTrue);
    145 
    146   /* check if pixel order is RA */
    147   if ((image->number_channels == 2) &&
    148       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
    149     return(MagickTrue);
    150 
    151   if (image->number_channels == 2)
    152     return(MagickFalse);
    153 
    154   /* check if pixel order is RGB */
    155   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
    156       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
    157     return(MagickFalse);
    158 
    159   if (image->number_channels == 3)
    160     return(MagickTrue);
    161 
    162   /* check if pixel order is RGBA */
    163   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
    164     return(MagickFalse);
    165 
    166   return(MagickTrue);
    167 }
    168 
    169 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
    170 {
    171   if (checkAccelerateCondition(image) == MagickFalse)
    172     return(MagickFalse);
    173 
    174   /* the order will be RGBA if the image has 4 channels */
    175   if (image->number_channels != 4)
    176     return(MagickFalse);
    177 
    178   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
    179       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
    180       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
    181       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
    182     return(MagickFalse);
    183 
    184   return(MagickTrue);
    185 }
    186 
    187 static MagickBooleanType checkPixelIntensity(const Image *image,
    188   const PixelIntensityMethod method)
    189 {
    190   /* EncodePixelGamma and DecodePixelGamma are not supported */
    191   if ((method == Rec601LumaPixelIntensityMethod) ||
    192       (method == Rec709LumaPixelIntensityMethod))
    193     {
    194       if (image->colorspace == RGBColorspace)
    195         return(MagickFalse);
    196     }
    197 
    198   if ((method == Rec601LuminancePixelIntensityMethod) ||
    199       (method == Rec709LuminancePixelIntensityMethod))
    200     {
    201       if (image->colorspace == sRGBColorspace)
    202         return(MagickFalse);
    203     }
    204 
    205   return(MagickTrue);
    206 }
    207 
    208 static MagickBooleanType checkHistogramCondition(const Image *image,
    209   const PixelIntensityMethod method)
    210 {
    211   /* ensure this is the only pass get in for now. */
    212   if ((image->channel_mask & SyncChannels) == 0)
    213     return MagickFalse;
    214 
    215   return(checkPixelIntensity(image,method));
    216 }
    217 
    218 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
    219 {
    220   MagickCLEnv
    221     clEnv;
    222 
    223   clEnv=GetCurrentOpenCLEnv();
    224   if (clEnv == (MagickCLEnv) NULL)
    225     return((MagickCLEnv) NULL);
    226 
    227   if (clEnv->enabled == MagickFalse)
    228     return((MagickCLEnv) NULL);
    229 
    230   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
    231     return((MagickCLEnv) NULL);
    232 
    233   return(clEnv);
    234 }
    235 
    236 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
    237 {
    238   Image
    239     *clone;
    240 
    241   if (((image->channel_mask & RedChannel) != 0) &&
    242       ((image->channel_mask & GreenChannel) != 0) &&
    243       ((image->channel_mask & BlueChannel) != 0) &&
    244       ((image->channel_mask & AlphaChannel) != 0))
    245     clone=CloneImage(image,0,0,MagickTrue,exception);
    246   else
    247     {
    248       clone=CloneImage(image,0,0,MagickTrue,exception);
    249       if (clone != (Image *) NULL)
    250         SyncImagePixelCache(clone,exception);
    251     }
    252   return(clone);
    253 }
    254 
    255 /* pad the global workgroup size to the next multiple of
    256    the local workgroup size */
    257 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
    258   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
    259 {
    260   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
    261 }
    262 
    263 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
    264   const double sigma,cl_uint *width,ExceptionInfo *exception)
    265 {
    266   char
    267     geometry[MagickPathExtent];
    268 
    269   cl_mem
    270     imageKernelBuffer;
    271 
    272   float
    273     *kernelBufferPtr;
    274 
    275   KernelInfo
    276     *kernel;
    277 
    278   ssize_t
    279     i;
    280 
    281   (void) FormatLocaleString(geometry,MagickPathExtent,
    282     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
    283   kernel=AcquireKernelInfo(geometry,exception);
    284   if (kernel == (KernelInfo *) NULL)
    285   {
    286     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    287       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
    288     return((cl_mem) NULL);
    289   }
    290   kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
    291     sizeof(*kernelBufferPtr));
    292   if (kernelBufferPtr == (float *) NULL)
    293     {
    294       kernel=DestroyKernelInfo(kernel);
    295       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    296         ResourceLimitWarning,"MemoryAllocationFailed.",".");
    297       return((cl_mem) NULL);
    298     }
    299   for (i = 0; i < (ssize_t) kernel->width; i++)
    300     kernelBufferPtr[i] = (float)kernel->values[i];
    301   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
    302     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
    303   *width=(cl_uint) kernel->width;
    304   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
    305   kernel=DestroyKernelInfo(kernel);
    306   if (imageKernelBuffer == (cl_mem) NULL)
    307     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    308       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
    309   return(imageKernelBuffer);
    310 }
    311 
    312 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
    313   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
    314   cl_mem histogramBuffer,Image *image,const ChannelType channel,
    315   ExceptionInfo *exception)
    316 {
    317   MagickBooleanType
    318     outputReady;
    319 
    320   cl_int
    321     clStatus;
    322 
    323   cl_kernel
    324     histogramKernel;
    325 
    326   cl_event
    327     event;
    328 
    329   cl_uint
    330     colorspace,
    331     method;
    332 
    333   register ssize_t
    334     i;
    335 
    336   size_t
    337     global_work_size[2];
    338 
    339   histogramKernel = NULL;
    340 
    341   outputReady = MagickFalse;
    342   colorspace = image->colorspace;
    343   method = image->intensity;
    344 
    345   /* get the OpenCL kernel */
    346   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
    347   if (histogramKernel == NULL)
    348   {
    349     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
    350     goto cleanup;
    351   }
    352 
    353   /* set the kernel arguments */
    354   i = 0;
    355   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    356   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
    357   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
    358   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
    359   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
    360   if (clStatus != CL_SUCCESS)
    361   {
    362     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
    363     goto cleanup;
    364   }
    365 
    366   /* launch the kernel */
    367   global_work_size[0] = image->columns;
    368   global_work_size[1] = image->rows;
    369 
    370   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
    371 
    372   if (clStatus != CL_SUCCESS)
    373   {
    374     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
    375     goto cleanup;
    376   }
    377   RecordProfileData(device,histogramKernel,event);
    378 
    379   outputReady = MagickTrue;
    380 
    381 cleanup:
    382 
    383   if (histogramKernel!=NULL)
    384     ReleaseOpenCLKernel(histogramKernel);
    385 
    386   return(outputReady);
    387 }
    388 
    389 /*
    390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    391 %                                                                             %
    392 %                                                                             %
    393 %                                                                             %
    394 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
    395 %                                                                             %
    396 %                                                                             %
    397 %                                                                             %
    398 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    399 */
    400 
    401 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
    402   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
    403 {
    404   cl_command_queue
    405     queue;
    406 
    407   cl_float
    408     cl_attenuate;
    409 
    410   cl_int
    411     status;
    412 
    413   cl_kernel
    414     addNoiseKernel;
    415 
    416   cl_mem
    417     filteredImageBuffer,
    418     imageBuffer;
    419 
    420   cl_uint
    421     bufferLength,
    422     inputPixelCount,
    423     number_channels,
    424     numRandomNumberPerPixel,
    425     pixelsPerWorkitem,
    426     seed0,
    427     seed1,
    428     workItemCount;
    429 
    430   const unsigned long
    431     *s;
    432 
    433   MagickBooleanType
    434     outputReady;
    435 
    436   MagickCLDevice
    437     device;
    438 
    439   Image
    440     *filteredImage;
    441 
    442   RandomInfo
    443     *randomInfo;
    444 
    445   size_t
    446     gsize[1],
    447     i,
    448     lsize[1],
    449     numRandPerChannel;
    450 
    451   filteredImage=NULL;
    452   imageBuffer=NULL;
    453   filteredImageBuffer=NULL;
    454   addNoiseKernel=NULL;
    455   outputReady=MagickFalse;
    456 
    457   device=RequestOpenCLDevice(clEnv);
    458   queue=AcquireOpenCLCommandQueue(device);
    459   if (queue == (cl_command_queue) NULL)
    460     goto cleanup;
    461   filteredImage=cloneImage(image,exception);
    462   if (filteredImage == (Image *) NULL)
    463     goto cleanup;
    464   if (filteredImage->number_channels != image->number_channels)
    465     goto cleanup;
    466   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
    467   if (imageBuffer == (cl_mem) NULL)
    468     goto cleanup;
    469   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
    470   if (filteredImageBuffer == (cl_mem) NULL)
    471     goto cleanup;
    472 
    473   /* find out how many random numbers needed by pixel */
    474   numRandPerChannel=0;
    475   numRandomNumberPerPixel=0;
    476   switch (noise_type)
    477   {
    478     case UniformNoise:
    479     case ImpulseNoise:
    480     case LaplacianNoise:
    481     case RandomNoise:
    482     default:
    483       numRandPerChannel=1;
    484       break;
    485     case GaussianNoise:
    486     case MultiplicativeGaussianNoise:
    487     case PoissonNoise:
    488       numRandPerChannel=2;
    489       break;
    490   };
    491   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
    492     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
    493   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
    494     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
    495   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
    496     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
    497   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
    498     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
    499 
    500   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
    501   if (addNoiseKernel == (cl_kernel) NULL)
    502   {
    503     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
    504       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    505     goto cleanup;
    506   }
    507 
    508   /* 256 work items per group, 2 groups per CU */
    509   workItemCount=device->max_compute_units*2*256;
    510   inputPixelCount=(cl_int) (image->columns*image->rows);
    511   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
    512   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
    513   lsize[0]=256;
    514   gsize[0]=workItemCount;
    515 
    516   randomInfo=AcquireRandomInfo();
    517   s=GetRandomInfoSeed(randomInfo);
    518   seed0=s[0];
    519   (void) GetPseudoRandomValue(randomInfo);
    520   seed1=s[0];
    521   randomInfo=DestroyRandomInfo(randomInfo);
    522 
    523   number_channels=(cl_uint) image->number_channels;
    524   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
    525   cl_attenuate=(cl_float) attenuate;
    526 
    527   i=0;
    528   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
    530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
    531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
    532   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
    533   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
    534   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&cl_attenuate);
    535   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
    536   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
    537   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
    538   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
    539   if (status != CL_SUCCESS)
    540   {
    541     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    542       ResourceLimitWarning,"clSetKernelArg failed.",".");
    543     goto cleanup;
    544   }
    545 
    546   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
    547     lsize,image,filteredImage,MagickFalse,exception);
    548 
    549 cleanup:
    550 
    551   if (imageBuffer != (cl_mem) NULL)
    552     ReleaseOpenCLMemObject(imageBuffer);
    553   if (filteredImageBuffer != (cl_mem) NULL)
    554     ReleaseOpenCLMemObject(filteredImageBuffer);
    555   if (addNoiseKernel != (cl_kernel) NULL)
    556     ReleaseOpenCLKernel(addNoiseKernel);
    557   if (queue != (cl_command_queue) NULL)
    558     ReleaseOpenCLCommandQueue(device,queue);
    559   if (device != (MagickCLDevice) NULL)
    560     ReleaseOpenCLDevice(device);
    561   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
    562     filteredImage=DestroyImage(filteredImage);
    563 
    564   return(filteredImage);
    565 }
    566 
    567 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
    568   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
    569 {
    570   Image
    571     *filteredImage;
    572 
    573   MagickCLEnv
    574     clEnv;
    575 
    576   assert(image != NULL);
    577   assert(exception != (ExceptionInfo *) NULL);
    578 
    579   if (checkAccelerateCondition(image) == MagickFalse)
    580     return((Image *) NULL);
    581 
    582   clEnv=getOpenCLEnvironment(exception);
    583   if (clEnv == (MagickCLEnv) NULL)
    584     return((Image *) NULL);
    585 
    586   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,attenuate,
    587     exception);
    588   return(filteredImage);
    589 }
    590 
    591 /*
    592 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    593 %                                                                             %
    594 %                                                                             %
    595 %                                                                             %
    596 %     A c c e l e r a t e B l u r I m a g e                                   %
    597 %                                                                             %
    598 %                                                                             %
    599 %                                                                             %
    600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    601 */
    602 
    603 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
    604   const double radius,const double sigma,ExceptionInfo *exception)
    605 {
    606   cl_command_queue
    607     queue;
    608 
    609   cl_int
    610     status;
    611 
    612   cl_kernel
    613     blurColumnKernel,
    614     blurRowKernel;
    615 
    616   cl_mem
    617     filteredImageBuffer,
    618     imageBuffer,
    619     imageKernelBuffer,
    620     tempImageBuffer;
    621 
    622   cl_uint
    623     imageColumns,
    624     imageRows,
    625     kernelWidth,
    626     number_channels;
    627 
    628   Image
    629     *filteredImage;
    630 
    631   MagickBooleanType
    632     outputReady;
    633 
    634   MagickCLDevice
    635     device;
    636 
    637   MagickSizeType
    638     length;
    639 
    640   size_t
    641     chunkSize=256,
    642     gsize[2],
    643     i,
    644     lsize[2];
    645 
    646   filteredImage=NULL;
    647   imageBuffer=NULL;
    648   filteredImageBuffer=NULL;
    649   tempImageBuffer=NULL;
    650   imageKernelBuffer=NULL;
    651   blurRowKernel=NULL;
    652   blurColumnKernel=NULL;
    653   outputReady=MagickFalse;
    654 
    655   device=RequestOpenCLDevice(clEnv);
    656   queue=AcquireOpenCLCommandQueue(device);
    657   filteredImage=cloneImage(image,exception);
    658   if (filteredImage == (Image *) NULL)
    659     goto cleanup;
    660   if (filteredImage->number_channels != image->number_channels)
    661     goto cleanup;
    662   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
    663   if (imageBuffer == (cl_mem) NULL)
    664     goto cleanup;
    665   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
    666   if (filteredImageBuffer == (cl_mem) NULL)
    667     goto cleanup;
    668 
    669   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
    670     exception);
    671   if (imageKernelBuffer == (cl_mem) NULL)
    672     goto cleanup;
    673 
    674   length=image->columns*image->rows;
    675   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
    676     sizeof(cl_float4),(void *) NULL);
    677   if (tempImageBuffer == (cl_mem) NULL)
    678     goto cleanup;
    679 
    680   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
    681   if (blurRowKernel == (cl_kernel) NULL)
    682   {
    683     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    684       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    685     goto cleanup;
    686   }
    687 
    688   number_channels=(cl_uint) image->number_channels;
    689   imageColumns=(cl_uint) image->columns;
    690   imageRows=(cl_uint) image->rows;
    691 
    692   i=0;
    693   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    694   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
    695   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
    696   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
    697   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
    698   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
    699   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
    700   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
    701   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
    702   if (status != CL_SUCCESS)
    703   {
    704     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    705       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
    706     goto cleanup;
    707   }
    708 
    709   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
    710   gsize[1]=image->rows;
    711   lsize[0]=chunkSize;
    712   lsize[1]=1;
    713 
    714   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
    715     lsize,image,filteredImage,MagickFalse,exception);
    716   if (outputReady == MagickFalse)
    717     goto cleanup;
    718 
    719   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
    720   if (blurColumnKernel == (cl_kernel) NULL)
    721   {
    722     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    723       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    724     goto cleanup;
    725   }
    726 
    727   i=0;
    728   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
    729   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
    730   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
    731   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
    732   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
    733   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
    734   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
    735   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
    736   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
    737   if (status != CL_SUCCESS)
    738   {
    739     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    740       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
    741     goto cleanup;
    742   }
    743 
    744   gsize[0]=image->columns;
    745   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
    746   lsize[0]=1;
    747   lsize[1]=chunkSize;
    748 
    749   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
    750     lsize,image,filteredImage,MagickFalse,exception);
    751 
    752 cleanup:
    753 
    754   if (imageBuffer != (cl_mem) NULL)
    755     ReleaseOpenCLMemObject(imageBuffer);
    756   if (filteredImageBuffer != (cl_mem) NULL)
    757     ReleaseOpenCLMemObject(filteredImageBuffer);
    758   if (tempImageBuffer != (cl_mem) NULL)
    759     ReleaseOpenCLMemObject(tempImageBuffer);
    760   if (imageKernelBuffer != (cl_mem) NULL)
    761     ReleaseOpenCLMemObject(imageKernelBuffer);
    762   if (blurRowKernel != (cl_kernel) NULL)
    763     ReleaseOpenCLKernel(blurRowKernel);
    764   if (blurColumnKernel != (cl_kernel) NULL)
    765     ReleaseOpenCLKernel(blurColumnKernel);
    766   if (queue != (cl_command_queue) NULL)
    767     ReleaseOpenCLCommandQueue(device,queue);
    768   if (device != (MagickCLDevice) NULL)
    769     ReleaseOpenCLDevice(device);
    770   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
    771     filteredImage=DestroyImage(filteredImage);
    772 
    773   return(filteredImage);
    774 }
    775 
    776 MagickPrivate Image* AccelerateBlurImage(const Image *image,
    777   const double radius,const double sigma,ExceptionInfo *exception)
    778 {
    779   Image
    780     *filteredImage;
    781 
    782   MagickCLEnv
    783     clEnv;
    784 
    785   assert(image != NULL);
    786   assert(exception != (ExceptionInfo *) NULL);
    787 
    788   if (checkAccelerateCondition(image) == MagickFalse)
    789     return((Image *) NULL);
    790 
    791   clEnv=getOpenCLEnvironment(exception);
    792   if (clEnv == (MagickCLEnv) NULL)
    793     return((Image *) NULL);
    794 
    795   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
    796   return(filteredImage);
    797 }
    798 
    799 /*
    800 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    801 %                                                                             %
    802 %                                                                             %
    803 %                                                                             %
    804 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
    805 %                                                                             %
    806 %                                                                             %
    807 %                                                                             %
    808 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    809 */
    810 
    811 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
    812   const MagickBooleanType sharpen,ExceptionInfo *exception)
    813 {
    814   cl_command_queue
    815     queue;
    816 
    817   cl_int
    818     status,
    819     sign;
    820 
    821   cl_kernel
    822     contrastKernel;
    823 
    824   cl_mem
    825     imageBuffer;
    826 
    827   cl_uint
    828     number_channels;
    829 
    830   MagickBooleanType
    831     outputReady;
    832 
    833   MagickCLDevice
    834     device;
    835 
    836   size_t
    837     gsize[2],
    838     i;
    839 
    840   contrastKernel=NULL;
    841   imageBuffer=NULL;
    842   outputReady=MagickFalse;
    843 
    844   device=RequestOpenCLDevice(clEnv);
    845   queue=AcquireOpenCLCommandQueue(device);
    846   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
    847   if (imageBuffer == (cl_mem) NULL)
    848     goto cleanup;
    849 
    850   contrastKernel=AcquireOpenCLKernel(device,"Contrast");
    851   if (contrastKernel == (cl_kernel) NULL)
    852   {
    853     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    854       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
    855     goto cleanup;
    856   }
    857 
    858   number_channels=(cl_uint) image->number_channels;
    859   sign=sharpen != MagickFalse ? 1 : -1;
    860 
    861   i=0;
    862   status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
    863   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
    864   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
    865   if (status != CL_SUCCESS)
    866   {
    867     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
    868       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
    869     goto cleanup;
    870   }
    871 
    872   gsize[0]=image->columns;
    873   gsize[1]=image->rows;
    874 
    875   outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
    876     gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
    877 
    878 cleanup:
    879 
    880   if (imageBuffer != (cl_mem) NULL)
    881     ReleaseOpenCLMemObject(imageBuffer);
    882   if (contrastKernel != (cl_kernel) NULL)
    883     ReleaseOpenCLKernel(contrastKernel);
    884   if (queue != (cl_command_queue) NULL)
    885     ReleaseOpenCLCommandQueue(device,queue);
    886   if (device != (MagickCLDevice) NULL)
    887     ReleaseOpenCLDevice(device);
    888 
    889   return(outputReady);
    890 }
    891 
    892 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
    893   const MagickBooleanType sharpen,ExceptionInfo *exception)
    894 {
    895   MagickBooleanType
    896     status;
    897 
    898   MagickCLEnv
    899     clEnv;
    900 
    901   assert(image != NULL);
    902   assert(exception != (ExceptionInfo *) NULL);
    903 
    904   if (checkAccelerateCondition(image) == MagickFalse)
    905     return(MagickFalse);
    906 
    907   clEnv=getOpenCLEnvironment(exception);
    908   if (clEnv == (MagickCLEnv) NULL)
    909     return(MagickFalse);
    910 
    911   status=ComputeContrastImage(image,clEnv,sharpen,exception);
    912   return(status);
    913 }
    914 
    915 /*
    916 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    917 %                                                                             %
    918 %                                                                             %
    919 %                                                                             %
    920 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
    921 %                                                                             %
    922 %                                                                             %
    923 %                                                                             %
    924 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    925 */
    926 
    927 static MagickBooleanType ComputeContrastStretchImage(Image *image,
    928   MagickCLEnv clEnv,const double black_point,const double white_point,
    929   ExceptionInfo *exception)
    930 {
    931 #define ContrastStretchImageTag  "ContrastStretch/Image"
    932 #define MaxRange(color)  ((cl_float) ScaleQuantumToMap((Quantum) (color)))
    933 
    934   CacheView
    935     *image_view;
    936 
    937   cl_command_queue
    938     queue;
    939 
    940   cl_int
    941     clStatus;
    942 
    943   cl_mem_flags
    944     mem_flags;
    945 
    946   cl_mem
    947     histogramBuffer,
    948     imageBuffer,
    949     stretchMapBuffer;
    950 
    951   cl_kernel
    952     histogramKernel,
    953     stretchKernel;
    954 
    955   cl_event
    956     event;
    957 
    958   cl_uint4
    959     *histogram;
    960 
    961   double
    962     intensity;
    963 
    964   cl_float4
    965     black,
    966     white;
    967 
    968   MagickBooleanType
    969     outputReady,
    970     status;
    971 
    972   MagickCLDevice
    973     device;
    974 
    975   MagickSizeType
    976     length;
    977 
    978   PixelPacket
    979     *stretch_map;
    980 
    981   register ssize_t
    982     i;
    983 
    984   size_t
    985     global_work_size[2];
    986 
    987   void
    988     *hostPtr,
    989     *inputPixels;
    990 
    991   histogram=NULL;
    992   stretch_map=NULL;
    993   inputPixels = NULL;
    994   imageBuffer = NULL;
    995   histogramBuffer = NULL;
    996   stretchMapBuffer = NULL;
    997   histogramKernel = NULL;
    998   stretchKernel = NULL;
    999   queue = NULL;
   1000   outputReady = MagickFalse;
   1001 
   1002 
   1003   assert(image != (Image *) NULL);
   1004   assert(image->signature == MagickCoreSignature);
   1005   if (image->debug != MagickFalse)
   1006     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   1007 
   1008   /* exception=(&image->exception); */
   1009 
   1010   /*
   1011     Initialize opencl environment.
   1012   */
   1013   device = RequestOpenCLDevice(clEnv);
   1014   queue = AcquireOpenCLCommandQueue(device);
   1015 
   1016   /*
   1017     Allocate and initialize histogram arrays.
   1018   */
   1019   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
   1020 
   1021   if (histogram == (cl_uint4 *) NULL)
   1022     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
   1023 
   1024   /* reset histogram */
   1025   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
   1026 
   1027   /*
   1028   if (IsGrayImage(image,exception) != MagickFalse)
   1029     (void) SetImageColorspace(image,GRAYColorspace);
   1030   */
   1031 
   1032   status=MagickTrue;
   1033 
   1034 
   1035   /*
   1036     Form histogram.
   1037   */
   1038   /* Create and initialize OpenCL buffers. */
   1039   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
   1040   /* assume this  will get a writable image */
   1041   image_view=AcquireAuthenticCacheView(image,exception);
   1042   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   1043 
   1044   if (inputPixels == (void *) NULL)
   1045   {
   1046     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   1047     goto cleanup;
   1048   }
   1049   /* If the host pointer is aligned to the size of CLPixelPacket,
   1050      then use the host buffer directly from the GPU; otherwise,
   1051      create a buffer on the GPU and copy the data over */
   1052   if (ALIGNED(inputPixels,CLPixelPacket))
   1053   {
   1054     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1055   }
   1056   else
   1057   {
   1058     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   1059   }
   1060   /* create a CL buffer from image pixel buffer */
   1061   length = image->columns * image->rows;
   1062   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1063   if (clStatus != CL_SUCCESS)
   1064   {
   1065     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1066     goto cleanup;
   1067   }
   1068 
   1069   /* If the host pointer is aligned to the size of cl_uint,
   1070      then use the host buffer directly from the GPU; otherwise,
   1071      create a buffer on the GPU and copy the data over */
   1072   if (ALIGNED(histogram,cl_uint4))
   1073   {
   1074     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   1075     hostPtr = histogram;
   1076   }
   1077   else
   1078   {
   1079     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1080     hostPtr = histogram;
   1081   }
   1082   /* create a CL buffer for histogram  */
   1083   length = (MaxMap+1);
   1084   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
   1085   if (clStatus != CL_SUCCESS)
   1086   {
   1087     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1088     goto cleanup;
   1089   }
   1090 
   1091   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
   1092   if (status == MagickFalse)
   1093     goto cleanup;
   1094 
   1095   /* read from the kenel output */
   1096   if (ALIGNED(histogram,cl_uint4))
   1097   {
   1098     length = (MaxMap+1);
   1099     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
   1100   }
   1101   else
   1102   {
   1103     length = (MaxMap+1);
   1104     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
   1105   }
   1106   if (clStatus != CL_SUCCESS)
   1107   {
   1108     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1109     goto cleanup;
   1110   }
   1111 
   1112   /* unmap, don't block gpu to use this buffer again.  */
   1113   if (ALIGNED(histogram,cl_uint4))
   1114   {
   1115     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
   1116     if (clStatus != CL_SUCCESS)
   1117     {
   1118       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   1119       goto cleanup;
   1120     }
   1121   }
   1122 
   1123   /* recreate input buffer later, in case image updated */
   1124 #ifdef RECREATEBUFFER
   1125   if (imageBuffer!=NULL)
   1126     clEnv->library->clReleaseMemObject(imageBuffer);
   1127 #endif
   1128 
   1129   /* CPU stuff */
   1130   /*
   1131      Find the histogram boundaries by locating the black/white levels.
   1132   */
   1133   black.x=0.0;
   1134   white.x=MaxRange(QuantumRange);
   1135   if ((image->channel_mask & RedChannel) != 0)
   1136   {
   1137     intensity=0.0;
   1138     for (i=0; i <= (ssize_t) MaxMap; i++)
   1139     {
   1140       intensity+=histogram[i].s[2];
   1141       if (intensity > black_point)
   1142         break;
   1143     }
   1144     black.x=(cl_float) i;
   1145     intensity=0.0;
   1146     for (i=(ssize_t) MaxMap; i != 0; i--)
   1147     {
   1148       intensity+=histogram[i].s[2];
   1149       if (intensity > ((double) image->columns*image->rows-white_point))
   1150         break;
   1151     }
   1152     white.x=(cl_float) i;
   1153   }
   1154   black.y=0.0;
   1155   white.y=MaxRange(QuantumRange);
   1156   if ((image->channel_mask & GreenChannel) != 0)
   1157   {
   1158     intensity=0.0;
   1159     for (i=0; i <= (ssize_t) MaxMap; i++)
   1160     {
   1161       intensity+=histogram[i].s[2];
   1162       if (intensity > black_point)
   1163         break;
   1164     }
   1165     black.y=(cl_float) i;
   1166     intensity=0.0;
   1167     for (i=(ssize_t) MaxMap; i != 0; i--)
   1168     {
   1169       intensity+=histogram[i].s[2];
   1170       if (intensity > ((double) image->columns*image->rows-white_point))
   1171         break;
   1172     }
   1173     white.y=(cl_float) i;
   1174   }
   1175   black.z=0.0;
   1176   white.z=MaxRange(QuantumRange);
   1177   if ((image->channel_mask & BlueChannel) != 0)
   1178   {
   1179     intensity=0.0;
   1180     for (i=0; i <= (ssize_t) MaxMap; i++)
   1181     {
   1182       intensity+=histogram[i].s[2];
   1183       if (intensity > black_point)
   1184         break;
   1185     }
   1186     black.z=(cl_float) i;
   1187     intensity=0.0;
   1188     for (i=(ssize_t) MaxMap; i != 0; i--)
   1189     {
   1190       intensity+=histogram[i].s[2];
   1191       if (intensity > ((double) image->columns*image->rows-white_point))
   1192         break;
   1193     }
   1194     white.z=(cl_float) i;
   1195   }
   1196   black.w=0.0;
   1197   white.w=MaxRange(QuantumRange);
   1198   if ((image->channel_mask & AlphaChannel) != 0)
   1199   {
   1200     intensity=0.0;
   1201     for (i=0; i <= (ssize_t) MaxMap; i++)
   1202     {
   1203       intensity+=histogram[i].s[2];
   1204       if (intensity > black_point)
   1205         break;
   1206     }
   1207     black.w=(cl_float) i;
   1208     intensity=0.0;
   1209     for (i=(ssize_t) MaxMap; i != 0; i--)
   1210     {
   1211       intensity+=histogram[i].s[2];
   1212       if (intensity > ((double) image->columns*image->rows-white_point))
   1213         break;
   1214     }
   1215     white.w=(cl_float) i;
   1216   }
   1217 
   1218   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
   1219     sizeof(*stretch_map));
   1220 
   1221   if (stretch_map == (PixelPacket *) NULL)
   1222     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
   1223       image->filename);
   1224 
   1225   /*
   1226     Stretch the histogram to create the stretched image mapping.
   1227   */
   1228   (void) memset(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
   1229   for (i=0; i <= (ssize_t) MaxMap; i++)
   1230   {
   1231     if ((image->channel_mask & RedChannel) != 0)
   1232     {
   1233       if (i < (ssize_t) black.x)
   1234         stretch_map[i].red=(Quantum) 0;
   1235       else
   1236         if (i > (ssize_t) white.x)
   1237           stretch_map[i].red=QuantumRange;
   1238         else
   1239           if (black.x != white.x)
   1240             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1241                   (i-black.x)/(white.x-black.x)));
   1242     }
   1243     if ((image->channel_mask & GreenChannel) != 0)
   1244     {
   1245       if (i < (ssize_t) black.y)
   1246         stretch_map[i].green=0;
   1247       else
   1248         if (i > (ssize_t) white.y)
   1249           stretch_map[i].green=QuantumRange;
   1250         else
   1251           if (black.y != white.y)
   1252             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1253                   (i-black.y)/(white.y-black.y)));
   1254     }
   1255     if ((image->channel_mask & BlueChannel) != 0)
   1256     {
   1257       if (i < (ssize_t) black.z)
   1258         stretch_map[i].blue=0;
   1259       else
   1260         if (i > (ssize_t) white.z)
   1261           stretch_map[i].blue= QuantumRange;
   1262         else
   1263           if (black.z != white.z)
   1264             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1265                   (i-black.z)/(white.z-black.z)));
   1266     }
   1267     if ((image->channel_mask & AlphaChannel) != 0)
   1268     {
   1269       if (i < (ssize_t) black.w)
   1270         stretch_map[i].alpha=0;
   1271       else
   1272         if (i > (ssize_t) white.w)
   1273           stretch_map[i].alpha=QuantumRange;
   1274         else
   1275           if (black.w != white.w)
   1276             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
   1277                   (i-black.w)/(white.w-black.w)));
   1278     }
   1279   }
   1280 
   1281   /*
   1282     Stretch the image.
   1283   */
   1284   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
   1285       (image->colorspace == CMYKColorspace)))
   1286     image->storage_class=DirectClass;
   1287   if (image->storage_class == PseudoClass)
   1288   {
   1289     /*
   1290        Stretch colormap.
   1291        */
   1292     for (i=0; i < (ssize_t) image->colors; i++)
   1293     {
   1294       if ((image->channel_mask & RedChannel) != 0)
   1295       {
   1296         if (black.x != white.x)
   1297           image->colormap[i].red=stretch_map[
   1298             ScaleQuantumToMap(image->colormap[i].red)].red;
   1299       }
   1300       if ((image->channel_mask & GreenChannel) != 0)
   1301       {
   1302         if (black.y != white.y)
   1303           image->colormap[i].green=stretch_map[
   1304             ScaleQuantumToMap(image->colormap[i].green)].green;
   1305       }
   1306       if ((image->channel_mask & BlueChannel) != 0)
   1307       {
   1308         if (black.z != white.z)
   1309           image->colormap[i].blue=stretch_map[
   1310             ScaleQuantumToMap(image->colormap[i].blue)].blue;
   1311       }
   1312       if ((image->channel_mask & AlphaChannel) != 0)
   1313       {
   1314         if (black.w != white.w)
   1315           image->colormap[i].alpha=stretch_map[
   1316             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
   1317       }
   1318     }
   1319   }
   1320 
   1321   /*
   1322     Stretch image.
   1323   */
   1324 
   1325 
   1326   /* GPU can work on this again, image and equalize map as input
   1327     image:        uchar4 (CLPixelPacket)
   1328     stretch_map:  uchar4 (PixelPacket)
   1329     black, white: float4 (FloatPixelPacket) */
   1330 
   1331 #ifdef RECREATEBUFFER
   1332   /* If the host pointer is aligned to the size of CLPixelPacket,
   1333      then use the host buffer directly from the GPU; otherwise,
   1334      create a buffer on the GPU and copy the data over */
   1335   if (ALIGNED(inputPixels,CLPixelPacket))
   1336   {
   1337     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   1338   }
   1339   else
   1340   {
   1341     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1342   }
   1343   /* create a CL buffer from image pixel buffer */
   1344   length = image->columns * image->rows;
   1345   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1346   if (clStatus != CL_SUCCESS)
   1347   {
   1348     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1349     goto cleanup;
   1350   }
   1351 #endif
   1352 
   1353   /* Create and initialize OpenCL buffers. */
   1354   if (ALIGNED(stretch_map, PixelPacket))
   1355   {
   1356     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1357     hostPtr = stretch_map;
   1358   }
   1359   else
   1360   {
   1361     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   1362     hostPtr = stretch_map;
   1363   }
   1364   /* create a CL buffer for stretch_map  */
   1365   length = (MaxMap+1);
   1366   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
   1367   if (clStatus != CL_SUCCESS)
   1368   {
   1369     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1370     goto cleanup;
   1371   }
   1372 
   1373   /* get the OpenCL kernel */
   1374   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
   1375   if (stretchKernel == NULL)
   1376   {
   1377     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1378     goto cleanup;
   1379   }
   1380 
   1381   /* set the kernel arguments */
   1382   i = 0;
   1383   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1384   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
   1385   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
   1386   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
   1387   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
   1388   if (clStatus != CL_SUCCESS)
   1389   {
   1390     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1391     goto cleanup;
   1392   }
   1393 
   1394   /* launch the kernel */
   1395   global_work_size[0] = image->columns;
   1396   global_work_size[1] = image->rows;
   1397 
   1398   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   1399 
   1400   if (clStatus != CL_SUCCESS)
   1401   {
   1402     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1403     goto cleanup;
   1404   }
   1405   RecordProfileData(device,stretchKernel,event);
   1406 
   1407   /* read the data back */
   1408   if (ALIGNED(inputPixels,CLPixelPacket))
   1409   {
   1410     length = image->columns * image->rows;
   1411     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   1412   }
   1413   else
   1414   {
   1415     length = image->columns * image->rows;
   1416     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   1417   }
   1418   if (clStatus != CL_SUCCESS)
   1419   {
   1420     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1421     goto cleanup;
   1422   }
   1423 
   1424   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   1425 
   1426 cleanup:
   1427 
   1428   image_view=DestroyCacheView(image_view);
   1429 
   1430   if (imageBuffer!=NULL)
   1431     clEnv->library->clReleaseMemObject(imageBuffer);
   1432 
   1433   if (stretchMapBuffer!=NULL)
   1434     clEnv->library->clReleaseMemObject(stretchMapBuffer);
   1435   if (stretch_map!=NULL)
   1436     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
   1437   if (histogramBuffer!=NULL)
   1438     clEnv->library->clReleaseMemObject(histogramBuffer);
   1439   if (histogram!=NULL)
   1440     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
   1441   if (histogramKernel!=NULL)
   1442     ReleaseOpenCLKernel(histogramKernel);
   1443   if (stretchKernel!=NULL)
   1444     ReleaseOpenCLKernel(stretchKernel);
   1445   if (queue != NULL)
   1446     ReleaseOpenCLCommandQueue(device,queue);
   1447   if (device != NULL)
   1448     ReleaseOpenCLDevice(device);
   1449 
   1450   return(outputReady);
   1451 }
   1452 
   1453 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
   1454   Image *image,const double black_point,const double white_point,
   1455   ExceptionInfo *exception)
   1456 {
   1457   MagickBooleanType
   1458     status;
   1459 
   1460   MagickCLEnv
   1461     clEnv;
   1462 
   1463   assert(image != NULL);
   1464   assert(exception != (ExceptionInfo *) NULL);
   1465 
   1466   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   1467       (checkHistogramCondition(image,image->intensity) == MagickFalse))
   1468     return(MagickFalse);
   1469 
   1470   clEnv=getOpenCLEnvironment(exception);
   1471   if (clEnv == (MagickCLEnv) NULL)
   1472     return(MagickFalse);
   1473 
   1474   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
   1475     exception);
   1476   return(status);
   1477 }
   1478 
   1479 /*
   1480 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1481 %                                                                             %
   1482 %                                                                             %
   1483 %                                                                             %
   1484 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
   1485 %                                                                             %
   1486 %                                                                             %
   1487 %                                                                             %
   1488 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1489 */
   1490 
   1491 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
   1492   const KernelInfo *kernel,ExceptionInfo *exception)
   1493 {
   1494   CacheView
   1495     *filteredImage_view,
   1496     *image_view;
   1497 
   1498   cl_command_queue
   1499     queue;
   1500 
   1501   cl_event
   1502     event;
   1503 
   1504   cl_kernel
   1505     clkernel;
   1506 
   1507   cl_int
   1508     clStatus;
   1509 
   1510   cl_mem
   1511     convolutionKernel,
   1512     filteredImageBuffer,
   1513     imageBuffer;
   1514 
   1515   cl_mem_flags
   1516     mem_flags;
   1517 
   1518   const void
   1519     *inputPixels;
   1520 
   1521   float
   1522     *kernelBufferPtr;
   1523 
   1524   Image
   1525     *filteredImage;
   1526 
   1527   MagickBooleanType
   1528     outputReady;
   1529 
   1530   MagickCLDevice
   1531     device;
   1532 
   1533   MagickSizeType
   1534     length;
   1535 
   1536   size_t
   1537     global_work_size[3],
   1538     localGroupSize[3],
   1539     localMemoryRequirement;
   1540 
   1541   unsigned
   1542     kernelSize;
   1543 
   1544   unsigned int
   1545     filterHeight,
   1546     filterWidth,
   1547     i,
   1548     imageHeight,
   1549     imageWidth,
   1550     matte;
   1551 
   1552   void
   1553     *filteredPixels,
   1554     *hostPtr;
   1555 
   1556   /* intialize all CL objects to NULL */
   1557   imageBuffer = NULL;
   1558   filteredImageBuffer = NULL;
   1559   convolutionKernel = NULL;
   1560   clkernel = NULL;
   1561   queue = NULL;
   1562 
   1563   filteredImage = NULL;
   1564   filteredImage_view = NULL;
   1565   outputReady = MagickFalse;
   1566 
   1567   device = RequestOpenCLDevice(clEnv);
   1568 
   1569   image_view=AcquireAuthenticCacheView(image,exception);
   1570   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   1571   if (inputPixels == (const void *) NULL)
   1572   {
   1573     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   1574     goto cleanup;
   1575   }
   1576 
   1577   /* Create and initialize OpenCL buffers. */
   1578 
   1579   /* If the host pointer is aligned to the size of CLPixelPacket,
   1580      then use the host buffer directly from the GPU; otherwise,
   1581      create a buffer on the GPU and copy the data over */
   1582   if (ALIGNED(inputPixels,CLPixelPacket))
   1583   {
   1584     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1585   }
   1586   else
   1587   {
   1588     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   1589   }
   1590   /* create a CL buffer from image pixel buffer */
   1591   length = image->columns * image->rows;
   1592   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1593   if (clStatus != CL_SUCCESS)
   1594   {
   1595     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1596     goto cleanup;
   1597   }
   1598 
   1599   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
   1600   assert(filteredImage != NULL);
   1601   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   1602   {
   1603     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   1604     goto cleanup;
   1605   }
   1606   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   1607   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   1608   if (filteredPixels == (void *) NULL)
   1609   {
   1610     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   1611     goto cleanup;
   1612   }
   1613 
   1614   if (ALIGNED(filteredPixels,CLPixelPacket))
   1615   {
   1616     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   1617     hostPtr = filteredPixels;
   1618   }
   1619   else
   1620   {
   1621     mem_flags = CL_MEM_WRITE_ONLY;
   1622     hostPtr = NULL;
   1623   }
   1624   /* create a CL buffer from image pixel buffer */
   1625   length = image->columns * image->rows;
   1626   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   1627   if (clStatus != CL_SUCCESS)
   1628   {
   1629     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1630     goto cleanup;
   1631   }
   1632 
   1633   kernelSize = (unsigned int) (kernel->width * kernel->height);
   1634   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
   1635   if (clStatus != CL_SUCCESS)
   1636   {
   1637     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1638     goto cleanup;
   1639   }
   1640 
   1641   queue = AcquireOpenCLCommandQueue(device);
   1642 
   1643   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
   1644           , 0, NULL, NULL, &clStatus);
   1645   if (clStatus != CL_SUCCESS)
   1646   {
   1647     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
   1648     goto cleanup;
   1649   }
   1650   for (i = 0; i < kernelSize; i++)
   1651   {
   1652     kernelBufferPtr[i] = (float) kernel->values[i];
   1653   }
   1654   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
   1655   if (clStatus != CL_SUCCESS)
   1656   {
   1657     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   1658     goto cleanup;
   1659   }
   1660 
   1661   /* Compute the local memory requirement for a 16x16 workgroup.
   1662      If it's larger than 16k, reduce the workgroup size to 8x8 */
   1663   localGroupSize[0] = 16;
   1664   localGroupSize[1] = 16;
   1665   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
   1666     + kernel->width*kernel->height*sizeof(float);
   1667 
   1668   if (localMemoryRequirement > device->local_memory_size)
   1669   {
   1670     localGroupSize[0] = 8;
   1671     localGroupSize[1] = 8;
   1672     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
   1673       + kernel->width*kernel->height*sizeof(float);
   1674   }
   1675   if (localMemoryRequirement <= device->local_memory_size)
   1676   {
   1677     /* get the OpenCL kernel */
   1678     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
   1679     if (clkernel == NULL)
   1680     {
   1681       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1682       goto cleanup;
   1683     }
   1684 
   1685     /* set the kernel arguments */
   1686     i = 0;
   1687     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1688     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   1689     imageWidth = (unsigned int) image->columns;
   1690     imageHeight = (unsigned int) image->rows;
   1691     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
   1692     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
   1693     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
   1694     filterWidth = (unsigned int) kernel->width;
   1695     filterHeight = (unsigned int) kernel->height;
   1696     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
   1697     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
   1698     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   1699     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
   1700     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   1701     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
   1702     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
   1703     if (clStatus != CL_SUCCESS)
   1704     {
   1705       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1706       goto cleanup;
   1707     }
   1708 
   1709     /* pad the global size to a multiple of the local work size dimension */
   1710     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
   1711     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
   1712 
   1713     /* launch the kernel */
   1714     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
   1715     if (clStatus != CL_SUCCESS)
   1716     {
   1717       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1718       goto cleanup;
   1719     }
   1720     RecordProfileData(device,clkernel,event);
   1721   }
   1722   else
   1723   {
   1724     /* get the OpenCL kernel */
   1725     clkernel = AcquireOpenCLKernel(device,"Convolve");
   1726     if (clkernel == NULL)
   1727     {
   1728       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   1729       goto cleanup;
   1730     }
   1731 
   1732     /* set the kernel arguments */
   1733     i = 0;
   1734     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   1735     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   1736     imageWidth = (unsigned int) image->columns;
   1737     imageHeight = (unsigned int) image->rows;
   1738     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
   1739     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
   1740     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
   1741     filterWidth = (unsigned int) kernel->width;
   1742     filterHeight = (unsigned int) kernel->height;
   1743     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
   1744     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
   1745     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   1746     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
   1747     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   1748     if (clStatus != CL_SUCCESS)
   1749     {
   1750       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   1751       goto cleanup;
   1752     }
   1753 
   1754     localGroupSize[0] = 8;
   1755     localGroupSize[1] = 8;
   1756     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
   1757     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
   1758 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
   1759 
   1760     if (clStatus != CL_SUCCESS)
   1761     {
   1762       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   1763       goto cleanup;
   1764     }
   1765   }
   1766   RecordProfileData(device,clkernel,event);
   1767 
   1768   if (ALIGNED(filteredPixels,CLPixelPacket))
   1769   {
   1770     length = image->columns * image->rows;
   1771     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   1772   }
   1773   else
   1774   {
   1775     length = image->columns * image->rows;
   1776     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   1777   }
   1778   if (clStatus != CL_SUCCESS)
   1779   {
   1780     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   1781     goto cleanup;
   1782   }
   1783 
   1784   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   1785 
   1786 cleanup:
   1787 
   1788   image_view=DestroyCacheView(image_view);
   1789   if (filteredImage_view != NULL)
   1790     filteredImage_view=DestroyCacheView(filteredImage_view);
   1791   if (imageBuffer != NULL)
   1792     clEnv->library->clReleaseMemObject(imageBuffer);
   1793   if (filteredImageBuffer != NULL)
   1794     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   1795   if (convolutionKernel != NULL)
   1796     clEnv->library->clReleaseMemObject(convolutionKernel);
   1797   if (clkernel != NULL)
   1798     ReleaseOpenCLKernel(clkernel);
   1799   if (queue != NULL)
   1800     ReleaseOpenCLCommandQueue(device,queue);
   1801   if (device != NULL)
   1802     ReleaseOpenCLDevice(device);
   1803   if (outputReady == MagickFalse)
   1804   {
   1805     if (filteredImage != NULL)
   1806     {
   1807       DestroyImage(filteredImage);
   1808       filteredImage = NULL;
   1809     }
   1810   }
   1811 
   1812   return(filteredImage);
   1813 }
   1814 
   1815 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
   1816   const KernelInfo *kernel,ExceptionInfo *exception)
   1817 {
   1818   /* Temporary disabled due to access violation
   1819 
   1820   Image
   1821     *filteredImage;
   1822 
   1823   assert(image != NULL);
   1824   assert(kernel != (KernelInfo *) NULL);
   1825   assert(exception != (ExceptionInfo *) NULL);
   1826   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   1827       (checkOpenCLEnvironment(exception) == MagickFalse))
   1828     return((Image *) NULL);
   1829 
   1830   filteredImage=ComputeConvolveImage(image,kernel,exception);
   1831   return(filteredImage);
   1832   */
   1833   magick_unreferenced(image);
   1834   magick_unreferenced(kernel);
   1835   magick_unreferenced(exception);
   1836   return((Image *)NULL);
   1837 }
   1838 
   1839 /*
   1840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1841 %                                                                             %
   1842 %                                                                             %
   1843 %                                                                             %
   1844 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
   1845 %                                                                             %
   1846 %                                                                             %
   1847 %                                                                             %
   1848 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   1849 */
   1850 
   1851 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
   1852   ExceptionInfo*exception)
   1853 {
   1854   static const int
   1855     X[4] = {0, 1, 1,-1},
   1856     Y[4] = {1, 0, 1, 1};
   1857 
   1858   CacheView
   1859     *filteredImage_view,
   1860     *image_view;
   1861 
   1862   cl_command_queue
   1863     queue;
   1864 
   1865   cl_int
   1866     clStatus;
   1867 
   1868   cl_kernel
   1869     hullPass1,
   1870     hullPass2;
   1871 
   1872   cl_event
   1873     event;
   1874 
   1875   cl_mem_flags
   1876     mem_flags;
   1877 
   1878   cl_mem
   1879     filteredImageBuffer,
   1880     imageBuffer,
   1881     tempImageBuffer[2];
   1882 
   1883   const void
   1884     *inputPixels;
   1885 
   1886   Image
   1887     *filteredImage;
   1888 
   1889   int
   1890     k,
   1891     matte;
   1892 
   1893   MagickBooleanType
   1894     outputReady;
   1895 
   1896   MagickCLDevice
   1897     device;
   1898 
   1899   MagickSizeType
   1900     length;
   1901 
   1902   size_t
   1903     global_work_size[2];
   1904 
   1905   unsigned int
   1906     imageHeight,
   1907     imageWidth;
   1908 
   1909   void
   1910     *filteredPixels,
   1911     *hostPtr;
   1912 
   1913   outputReady = MagickFalse;
   1914   inputPixels = NULL;
   1915   filteredImage = NULL;
   1916   filteredImage_view = NULL;
   1917   filteredPixels = NULL;
   1918   imageBuffer = NULL;
   1919   filteredImageBuffer = NULL;
   1920   hullPass1 = NULL;
   1921   hullPass2 = NULL;
   1922   queue = NULL;
   1923   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
   1924 
   1925   device = RequestOpenCLDevice(clEnv);
   1926   queue = AcquireOpenCLCommandQueue(device);
   1927 
   1928   image_view=AcquireAuthenticCacheView(image,exception);
   1929   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   1930   if (inputPixels == (void *) NULL)
   1931   {
   1932     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   1933     goto cleanup;
   1934   }
   1935 
   1936   if (ALIGNED(inputPixels,CLPixelPacket))
   1937   {
   1938     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   1939   }
   1940   else
   1941   {
   1942     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   1943   }
   1944   /* create a CL buffer from image pixel buffer */
   1945   length = image->columns * image->rows;
   1946   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   1947   if (clStatus != CL_SUCCESS)
   1948   {
   1949     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1950     goto cleanup;
   1951   }
   1952 
   1953   mem_flags = CL_MEM_READ_WRITE;
   1954   length = image->columns * image->rows;
   1955   for (k = 0; k < 2; k++)
   1956   {
   1957     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
   1958     if (clStatus != CL_SUCCESS)
   1959     {
   1960       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1961       goto cleanup;
   1962     }
   1963   }
   1964 
   1965   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
   1966   assert(filteredImage != NULL);
   1967   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   1968   {
   1969     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   1970     goto cleanup;
   1971   }
   1972   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   1973   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   1974   if (filteredPixels == (void *) NULL)
   1975   {
   1976     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   1977     goto cleanup;
   1978   }
   1979 
   1980   if (ALIGNED(filteredPixels,CLPixelPacket))
   1981   {
   1982     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   1983     hostPtr = filteredPixels;
   1984   }
   1985   else
   1986   {
   1987     mem_flags = CL_MEM_WRITE_ONLY;
   1988     hostPtr = NULL;
   1989   }
   1990   /* create a CL buffer from image pixel buffer */
   1991   length = image->columns * image->rows;
   1992   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   1993   if (clStatus != CL_SUCCESS)
   1994   {
   1995     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   1996     goto cleanup;
   1997   }
   1998 
   1999   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
   2000   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
   2001 
   2002   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
   2003   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
   2004   imageWidth = (unsigned int) image->columns;
   2005   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
   2006   imageHeight = (unsigned int) image->rows;
   2007   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
   2008   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   2009   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
   2010   if (clStatus != CL_SUCCESS)
   2011   {
   2012     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2013     goto cleanup;
   2014   }
   2015 
   2016   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
   2017   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
   2018   imageWidth = (unsigned int) image->columns;
   2019   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
   2020   imageHeight = (unsigned int) image->rows;
   2021   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
   2022   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   2023   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
   2024   if (clStatus != CL_SUCCESS)
   2025   {
   2026     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2027     goto cleanup;
   2028   }
   2029 
   2030 
   2031   global_work_size[0] = image->columns;
   2032   global_work_size[1] = image->rows;
   2033 
   2034 
   2035   for (k = 0; k < 4; k++)
   2036   {
   2037     cl_int2 offset;
   2038     int polarity;
   2039 
   2040 
   2041     offset.s[0] = X[k];
   2042     offset.s[1] = Y[k];
   2043     polarity = 1;
   2044     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2045     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2046     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2047     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2048     if (clStatus != CL_SUCCESS)
   2049     {
   2050       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2051       goto cleanup;
   2052     }
   2053     /* launch the kernel */
   2054 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2055     if (clStatus != CL_SUCCESS)
   2056     {
   2057       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2058       goto cleanup;
   2059     }
   2060     RecordProfileData(device,hullPass1,event);
   2061 
   2062     /* launch the kernel */
   2063 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2064     if (clStatus != CL_SUCCESS)
   2065     {
   2066       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2067       goto cleanup;
   2068     }
   2069     RecordProfileData(device,hullPass2,event);
   2070 
   2071     if (k == 0)
   2072       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
   2073     offset.s[0] = -X[k];
   2074     offset.s[1] = -Y[k];
   2075     polarity = 1;
   2076     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2077     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2078     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2079     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2080     if (clStatus != CL_SUCCESS)
   2081     {
   2082       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2083       goto cleanup;
   2084     }
   2085     /* launch the kernel */
   2086 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2087     if (clStatus != CL_SUCCESS)
   2088     {
   2089       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2090       goto cleanup;
   2091     }
   2092     RecordProfileData(device,hullPass1,event);
   2093 
   2094     /* launch the kernel */
   2095 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2096     if (clStatus != CL_SUCCESS)
   2097     {
   2098       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2099       goto cleanup;
   2100     }
   2101     RecordProfileData(device,hullPass2,event);
   2102 
   2103     offset.s[0] = -X[k];
   2104     offset.s[1] = -Y[k];
   2105     polarity = -1;
   2106     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2107     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2108     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2109     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2110     if (clStatus != CL_SUCCESS)
   2111     {
   2112       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2113       goto cleanup;
   2114     }
   2115     /* launch the kernel */
   2116 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2117     if (clStatus != CL_SUCCESS)
   2118     {
   2119       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2120       goto cleanup;
   2121     }
   2122     RecordProfileData(device,hullPass1,event);
   2123 
   2124     /* launch the kernel */
   2125 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2126     if (clStatus != CL_SUCCESS)
   2127     {
   2128       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2129       goto cleanup;
   2130     }
   2131     RecordProfileData(device,hullPass2,event);
   2132 
   2133     offset.s[0] = X[k];
   2134     offset.s[1] = Y[k];
   2135     polarity = -1;
   2136     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
   2137     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
   2138     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
   2139     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
   2140 
   2141     if (k == 3)
   2142       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
   2143 
   2144     if (clStatus != CL_SUCCESS)
   2145     {
   2146       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2147       goto cleanup;
   2148     }
   2149     /* launch the kernel */
   2150 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2151     if (clStatus != CL_SUCCESS)
   2152     {
   2153       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2154       goto cleanup;
   2155     }
   2156     RecordProfileData(device,hullPass1,event);
   2157 
   2158     /* launch the kernel */
   2159 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2160     if (clStatus != CL_SUCCESS)
   2161     {
   2162       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2163       goto cleanup;
   2164     }
   2165     RecordProfileData(device,hullPass2,event);
   2166   }
   2167 
   2168   if (ALIGNED(filteredPixels,CLPixelPacket))
   2169   {
   2170     length = image->columns * image->rows;
   2171     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   2172   }
   2173   else
   2174   {
   2175     length = image->columns * image->rows;
   2176     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   2177   }
   2178   if (clStatus != CL_SUCCESS)
   2179   {
   2180     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2181     goto cleanup;
   2182   }
   2183 
   2184   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   2185 
   2186 cleanup:
   2187 
   2188   image_view=DestroyCacheView(image_view);
   2189   if (filteredImage_view != NULL)
   2190     filteredImage_view=DestroyCacheView(filteredImage_view);
   2191 
   2192   if (queue != NULL)
   2193     ReleaseOpenCLCommandQueue(device,queue);
   2194   if (device != NULL)
   2195     ReleaseOpenCLDevice(device);
   2196   if (imageBuffer!=NULL)
   2197     clEnv->library->clReleaseMemObject(imageBuffer);
   2198   for (k = 0; k < 2; k++)
   2199   {
   2200     if (tempImageBuffer[k]!=NULL)
   2201       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
   2202   }
   2203   if (filteredImageBuffer!=NULL)
   2204     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   2205   if (hullPass1!=NULL)
   2206     ReleaseOpenCLKernel(hullPass1);
   2207   if (hullPass2!=NULL)
   2208     ReleaseOpenCLKernel(hullPass2);
   2209   if (outputReady == MagickFalse && filteredImage != NULL)
   2210     filteredImage=DestroyImage(filteredImage);
   2211 
   2212   return(filteredImage);
   2213 }
   2214 
   2215 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
   2216   ExceptionInfo* exception)
   2217 {
   2218   Image
   2219     *filteredImage;
   2220 
   2221   MagickCLEnv
   2222     clEnv;
   2223 
   2224   assert(image != NULL);
   2225   assert(exception != (ExceptionInfo *) NULL);
   2226 
   2227   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   2228     return((Image *) NULL);
   2229 
   2230   clEnv=getOpenCLEnvironment(exception);
   2231   if (clEnv == (MagickCLEnv) NULL)
   2232     return((Image *) NULL);
   2233 
   2234   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
   2235   return(filteredImage);
   2236 }
   2237 
   2238 /*
   2239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2240 %                                                                             %
   2241 %                                                                             %
   2242 %                                                                             %
   2243 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
   2244 %                                                                             %
   2245 %                                                                             %
   2246 %                                                                             %
   2247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2248 */
   2249 
   2250 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
   2251   ExceptionInfo *exception)
   2252 {
   2253 #define EqualizeImageTag  "Equalize/Image"
   2254 
   2255   CacheView
   2256     *image_view;
   2257 
   2258   cl_command_queue
   2259     queue;
   2260 
   2261   cl_int
   2262     clStatus;
   2263 
   2264   cl_mem_flags
   2265     mem_flags;
   2266 
   2267   cl_mem
   2268     equalizeMapBuffer,
   2269     histogramBuffer,
   2270     imageBuffer;
   2271 
   2272   cl_kernel
   2273     equalizeKernel,
   2274     histogramKernel;
   2275 
   2276   cl_event
   2277     event;
   2278 
   2279   cl_uint4
   2280     *histogram;
   2281 
   2282   cl_float4
   2283     white,
   2284     black,
   2285     intensity,
   2286     *map;
   2287 
   2288   MagickBooleanType
   2289     outputReady,
   2290     status;
   2291 
   2292   MagickCLDevice
   2293     device;
   2294 
   2295   MagickSizeType
   2296     length;
   2297 
   2298   PixelPacket
   2299     *equalize_map;
   2300 
   2301   register ssize_t
   2302     i;
   2303 
   2304   size_t
   2305     global_work_size[2];
   2306 
   2307   void
   2308     *hostPtr,
   2309     *inputPixels;
   2310 
   2311   map=NULL;
   2312   histogram=NULL;
   2313   equalize_map=NULL;
   2314   inputPixels = NULL;
   2315   imageBuffer = NULL;
   2316   histogramBuffer = NULL;
   2317   equalizeMapBuffer = NULL;
   2318   histogramKernel = NULL;
   2319   equalizeKernel = NULL;
   2320   queue = NULL;
   2321   outputReady = MagickFalse;
   2322 
   2323   assert(image != (Image *) NULL);
   2324   assert(image->signature == MagickCoreSignature);
   2325   if (image->debug != MagickFalse)
   2326     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   2327 
   2328   /*
   2329    * initialize opencl env
   2330    */
   2331   device = RequestOpenCLDevice(clEnv);
   2332   queue = AcquireOpenCLCommandQueue(device);
   2333 
   2334   /*
   2335     Allocate and initialize histogram arrays.
   2336   */
   2337   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
   2338   if (histogram == (cl_uint4 *) NULL)
   2339       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2340 
   2341   /* reset histogram */
   2342   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
   2343 
   2344   /* Create and initialize OpenCL buffers. */
   2345   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
   2346   /* assume this  will get a writable image */
   2347   image_view=AcquireAuthenticCacheView(image,exception);
   2348   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   2349 
   2350   if (inputPixels == (void *) NULL)
   2351   {
   2352     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   2353     goto cleanup;
   2354   }
   2355   /* If the host pointer is aligned to the size of CLPixelPacket,
   2356      then use the host buffer directly from the GPU; otherwise,
   2357      create a buffer on the GPU and copy the data over */
   2358   if (ALIGNED(inputPixels,CLPixelPacket))
   2359   {
   2360     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   2361   }
   2362   else
   2363   {
   2364     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   2365   }
   2366   /* create a CL buffer from image pixel buffer */
   2367   length = image->columns * image->rows;
   2368   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   2369   if (clStatus != CL_SUCCESS)
   2370   {
   2371     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2372     goto cleanup;
   2373   }
   2374 
   2375   /* If the host pointer is aligned to the size of cl_uint,
   2376      then use the host buffer directly from the GPU; otherwise,
   2377      create a buffer on the GPU and copy the data over */
   2378   if (ALIGNED(histogram,cl_uint4))
   2379   {
   2380     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   2381     hostPtr = histogram;
   2382   }
   2383   else
   2384   {
   2385     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2386     hostPtr = histogram;
   2387   }
   2388   /* create a CL buffer for histogram  */
   2389   length = (MaxMap+1);
   2390   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
   2391   if (clStatus != CL_SUCCESS)
   2392   {
   2393     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2394     goto cleanup;
   2395   }
   2396 
   2397   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
   2398   if (status == MagickFalse)
   2399     goto cleanup;
   2400 
   2401   /* read from the kenel output */
   2402   if (ALIGNED(histogram,cl_uint4))
   2403   {
   2404     length = (MaxMap+1);
   2405     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
   2406   }
   2407   else
   2408   {
   2409     length = (MaxMap+1);
   2410     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
   2411   }
   2412   if (clStatus != CL_SUCCESS)
   2413   {
   2414     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2415     goto cleanup;
   2416   }
   2417 
   2418   /* unmap, don't block gpu to use this buffer again.  */
   2419   if (ALIGNED(histogram,cl_uint4))
   2420   {
   2421     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
   2422     if (clStatus != CL_SUCCESS)
   2423     {
   2424       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   2425       goto cleanup;
   2426     }
   2427   }
   2428 
   2429   /* recreate input buffer later, in case image updated */
   2430 #ifdef RECREATEBUFFER
   2431   if (imageBuffer!=NULL)
   2432     clEnv->library->clReleaseMemObject(imageBuffer);
   2433 #endif
   2434 
   2435   /* CPU stuff */
   2436   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
   2437   if (equalize_map == (PixelPacket *) NULL)
   2438     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2439 
   2440   map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
   2441   if (map == (cl_float4 *) NULL)
   2442     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
   2443 
   2444   /*
   2445     Integrate the histogram to get the equalization map.
   2446   */
   2447   (void) memset(&intensity,0,sizeof(intensity));
   2448   for (i=0; i <= (ssize_t) MaxMap; i++)
   2449   {
   2450     if ((image->channel_mask & SyncChannels) != 0)
   2451     {
   2452       intensity.x+=histogram[i].s[2];
   2453       map[i]=intensity;
   2454       continue;
   2455     }
   2456     if ((image->channel_mask & RedChannel) != 0)
   2457       intensity.x+=histogram[i].s[2];
   2458     if ((image->channel_mask & GreenChannel) != 0)
   2459       intensity.y+=histogram[i].s[1];
   2460     if ((image->channel_mask & BlueChannel) != 0)
   2461       intensity.z+=histogram[i].s[0];
   2462     if ((image->channel_mask & AlphaChannel) != 0)
   2463       intensity.w+=histogram[i].s[3];
   2464     map[i]=intensity;
   2465   }
   2466   black=map[0];
   2467   white=map[(int) MaxMap];
   2468   (void) memset(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
   2469   for (i=0; i <= (ssize_t) MaxMap; i++)
   2470   {
   2471     if ((image->channel_mask & SyncChannels) != 0)
   2472     {
   2473       if (white.x != black.x)
   2474         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2475                 (map[i].x-black.x))/(white.x-black.x)));
   2476       continue;
   2477     }
   2478     if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
   2479       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2480               (map[i].x-black.x))/(white.x-black.x)));
   2481     if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
   2482       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2483               (map[i].y-black.y))/(white.y-black.y)));
   2484     if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
   2485       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2486               (map[i].z-black.z))/(white.z-black.z)));
   2487     if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
   2488       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
   2489               (map[i].w-black.w))/(white.w-black.w)));
   2490   }
   2491 
   2492   if (image->storage_class == PseudoClass)
   2493   {
   2494     /*
   2495        Equalize colormap.
   2496        */
   2497     for (i=0; i < (ssize_t) image->colors; i++)
   2498     {
   2499       if ((image->channel_mask & SyncChannels) != 0)
   2500       {
   2501         if (white.x != black.x)
   2502         {
   2503           image->colormap[i].red=equalize_map[
   2504             ScaleQuantumToMap(image->colormap[i].red)].red;
   2505           image->colormap[i].green=equalize_map[
   2506             ScaleQuantumToMap(image->colormap[i].green)].red;
   2507           image->colormap[i].blue=equalize_map[
   2508             ScaleQuantumToMap(image->colormap[i].blue)].red;
   2509           image->colormap[i].alpha=equalize_map[
   2510             ScaleQuantumToMap(image->colormap[i].alpha)].red;
   2511         }
   2512         continue;
   2513       }
   2514       if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
   2515         image->colormap[i].red=equalize_map[
   2516           ScaleQuantumToMap(image->colormap[i].red)].red;
   2517       if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
   2518         image->colormap[i].green=equalize_map[
   2519           ScaleQuantumToMap(image->colormap[i].green)].green;
   2520       if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
   2521         image->colormap[i].blue=equalize_map[
   2522           ScaleQuantumToMap(image->colormap[i].blue)].blue;
   2523       if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
   2524         image->colormap[i].alpha=equalize_map[
   2525           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
   2526     }
   2527   }
   2528 
   2529   /*
   2530     Equalize image.
   2531   */
   2532 
   2533   /* GPU can work on this again, image and equalize map as input
   2534     image:        uchar4 (CLPixelPacket)
   2535     equalize_map: uchar4 (PixelPacket)
   2536     black, white: float4 (FloatPixelPacket) */
   2537 
   2538 #ifdef RECREATEBUFFER
   2539   /* If the host pointer is aligned to the size of CLPixelPacket,
   2540      then use the host buffer directly from the GPU; otherwise,
   2541      create a buffer on the GPU and copy the data over */
   2542   if (ALIGNED(inputPixels,CLPixelPacket))
   2543   {
   2544     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   2545   }
   2546   else
   2547   {
   2548     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2549   }
   2550   /* create a CL buffer from image pixel buffer */
   2551   length = image->columns * image->rows;
   2552   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   2553   if (clStatus != CL_SUCCESS)
   2554   {
   2555     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2556     goto cleanup;
   2557   }
   2558 #endif
   2559 
   2560   /* Create and initialize OpenCL buffers. */
   2561   if (ALIGNED(equalize_map, PixelPacket))
   2562   {
   2563     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   2564     hostPtr = equalize_map;
   2565   }
   2566   else
   2567   {
   2568     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   2569     hostPtr = equalize_map;
   2570   }
   2571   /* create a CL buffer for eqaulize_map  */
   2572   length = (MaxMap+1);
   2573   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
   2574   if (clStatus != CL_SUCCESS)
   2575   {
   2576     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   2577     goto cleanup;
   2578   }
   2579 
   2580   /* get the OpenCL kernel */
   2581   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
   2582   if (equalizeKernel == NULL)
   2583   {
   2584     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   2585     goto cleanup;
   2586   }
   2587 
   2588   /* set the kernel arguments */
   2589   i = 0;
   2590   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2591   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
   2592   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
   2593   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
   2594   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
   2595   if (clStatus != CL_SUCCESS)
   2596   {
   2597     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   2598     goto cleanup;
   2599   }
   2600 
   2601   /* launch the kernel */
   2602   global_work_size[0] = image->columns;
   2603   global_work_size[1] = image->rows;
   2604 
   2605   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   2606 
   2607   if (clStatus != CL_SUCCESS)
   2608   {
   2609     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   2610     goto cleanup;
   2611   }
   2612   RecordProfileData(device,equalizeKernel,event);
   2613 
   2614   /* read the data back */
   2615   if (ALIGNED(inputPixels,CLPixelPacket))
   2616   {
   2617     length = image->columns * image->rows;
   2618     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   2619   }
   2620   else
   2621   {
   2622     length = image->columns * image->rows;
   2623     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   2624   }
   2625   if (clStatus != CL_SUCCESS)
   2626   {
   2627     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   2628     goto cleanup;
   2629   }
   2630 
   2631   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   2632 
   2633 cleanup:
   2634 
   2635   image_view=DestroyCacheView(image_view);
   2636 
   2637   if (imageBuffer!=NULL)
   2638     clEnv->library->clReleaseMemObject(imageBuffer);
   2639   if (map!=NULL)
   2640     map=(cl_float4 *) RelinquishMagickMemory(map);
   2641   if (equalizeMapBuffer!=NULL)
   2642     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
   2643   if (equalize_map!=NULL)
   2644     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
   2645   if (histogramBuffer!=NULL)
   2646     clEnv->library->clReleaseMemObject(histogramBuffer);
   2647   if (histogram!=NULL)
   2648     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
   2649   if (histogramKernel!=NULL)
   2650     ReleaseOpenCLKernel(histogramKernel);
   2651   if (equalizeKernel!=NULL)
   2652     ReleaseOpenCLKernel(equalizeKernel);
   2653   if (queue != NULL)
   2654     ReleaseOpenCLCommandQueue(device, queue);
   2655   if (device != NULL)
   2656     ReleaseOpenCLDevice(device);
   2657 
   2658   return(outputReady);
   2659 }
   2660 
   2661 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
   2662   ExceptionInfo *exception)
   2663 {
   2664   MagickBooleanType
   2665     status;
   2666 
   2667   MagickCLEnv
   2668     clEnv;
   2669 
   2670   assert(image != NULL);
   2671   assert(exception != (ExceptionInfo *) NULL);
   2672 
   2673   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
   2674       (checkHistogramCondition(image,image->intensity) == MagickFalse))
   2675     return(MagickFalse);
   2676 
   2677   clEnv=getOpenCLEnvironment(exception);
   2678   if (clEnv == (MagickCLEnv) NULL)
   2679     return(MagickFalse);
   2680 
   2681   status=ComputeEqualizeImage(image,clEnv,exception);
   2682   return(status);
   2683 }
   2684 
   2685 /*
   2686 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2687 %                                                                             %
   2688 %                                                                             %
   2689 %                                                                             %
   2690 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
   2691 %                                                                             %
   2692 %                                                                             %
   2693 %                                                                             %
   2694 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2695 */
   2696 
   2697 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
   2698   const MagickFunction function,const size_t number_parameters,
   2699   const double *parameters,ExceptionInfo *exception)
   2700 {
   2701   cl_command_queue
   2702     queue;
   2703 
   2704   cl_int
   2705     status;
   2706 
   2707   cl_kernel
   2708     functionKernel;
   2709 
   2710   cl_mem
   2711     imageBuffer,
   2712     parametersBuffer;
   2713 
   2714   cl_uint
   2715     number_params,
   2716     number_channels;
   2717 
   2718   float
   2719     *parametersBufferPtr;
   2720 
   2721   MagickBooleanType
   2722     outputReady;
   2723 
   2724   MagickCLDevice
   2725     device;
   2726 
   2727   size_t
   2728     gsize[2],
   2729     i;
   2730 
   2731   outputReady=MagickFalse;
   2732 
   2733   imageBuffer=NULL;
   2734   functionKernel=NULL;
   2735   parametersBuffer=NULL;
   2736 
   2737   device=RequestOpenCLDevice(clEnv);
   2738   queue=AcquireOpenCLCommandQueue(device);
   2739   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   2740   if (imageBuffer == (cl_mem) NULL)
   2741     goto cleanup;
   2742 
   2743   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
   2744     sizeof(float));
   2745   if (parametersBufferPtr == (float *) NULL)
   2746     goto cleanup;
   2747   for (i=0; i<number_parameters; i++)
   2748     parametersBufferPtr[i]=(float) parameters[i];
   2749   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   2750     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
   2751     parametersBufferPtr);
   2752   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
   2753   if (parametersBuffer == (cl_mem) NULL)
   2754   {
   2755     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2756       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   2757     goto cleanup;
   2758   }
   2759 
   2760   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
   2761   if (functionKernel == (cl_kernel) NULL)
   2762   {
   2763     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2764       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   2765     goto cleanup;
   2766   }
   2767 
   2768   number_channels=(cl_uint) image->number_channels;
   2769   number_params=(cl_uint) number_parameters;
   2770 
   2771   i=0;
   2772   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2773   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   2774   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   2775   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
   2776   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
   2777   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
   2778   if (status != CL_SUCCESS)
   2779   {
   2780     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2781       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   2782     goto cleanup;
   2783   }
   2784 
   2785   gsize[0]=image->columns;
   2786   gsize[1]=image->rows;
   2787   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
   2788     gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
   2789     exception);
   2790 
   2791 cleanup:
   2792 
   2793   if (imageBuffer != (cl_mem) NULL)
   2794     ReleaseOpenCLMemObject(imageBuffer);
   2795   if (parametersBuffer != (cl_mem) NULL)
   2796     ReleaseOpenCLMemObject(parametersBuffer);
   2797   if (functionKernel != (cl_kernel) NULL)
   2798     ReleaseOpenCLKernel(functionKernel);
   2799   if (queue != (cl_command_queue) NULL)
   2800     ReleaseOpenCLCommandQueue(device,queue);
   2801   if (device != (MagickCLDevice) NULL)
   2802     ReleaseOpenCLDevice(device);
   2803   return(outputReady);
   2804 }
   2805 
   2806 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
   2807   const MagickFunction function,const size_t number_parameters,
   2808   const double *parameters,ExceptionInfo *exception)
   2809 {
   2810   MagickBooleanType
   2811     status;
   2812 
   2813   MagickCLEnv
   2814     clEnv;
   2815 
   2816   assert(image != NULL);
   2817   assert(exception != (ExceptionInfo *) NULL);
   2818 
   2819   if (checkAccelerateCondition(image) == MagickFalse)
   2820     return(MagickFalse);
   2821 
   2822   clEnv=getOpenCLEnvironment(exception);
   2823   if (clEnv == (MagickCLEnv) NULL)
   2824     return(MagickFalse);
   2825 
   2826   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
   2827     parameters,exception);
   2828   return(status);
   2829 }
   2830 
   2831 /*
   2832 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2833 %                                                                             %
   2834 %                                                                             %
   2835 %                                                                             %
   2836 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
   2837 %                                                                             %
   2838 %                                                                             %
   2839 %                                                                             %
   2840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2841 */
   2842 
   2843 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
   2844   const PixelIntensityMethod method,ExceptionInfo *exception)
   2845 {
   2846   cl_command_queue
   2847     queue;
   2848 
   2849   cl_int
   2850     status;
   2851 
   2852   cl_kernel
   2853     grayscaleKernel;
   2854 
   2855   cl_mem
   2856     imageBuffer;
   2857 
   2858   cl_uint
   2859     number_channels,
   2860     colorspace,
   2861     intensityMethod;
   2862 
   2863   MagickBooleanType
   2864     outputReady;
   2865 
   2866   MagickCLDevice
   2867     device;
   2868 
   2869   size_t
   2870     gsize[2],
   2871     i;
   2872 
   2873   outputReady=MagickFalse;
   2874   imageBuffer=NULL;
   2875   grayscaleKernel=NULL;
   2876 
   2877   assert(image != (Image *) NULL);
   2878   assert(image->signature == MagickCoreSignature);
   2879   device=RequestOpenCLDevice(clEnv);
   2880   queue=AcquireOpenCLCommandQueue(device);
   2881   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   2882   if (imageBuffer == (cl_mem) NULL)
   2883     goto cleanup;
   2884 
   2885   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
   2886   if (grayscaleKernel == (cl_kernel) NULL)
   2887   {
   2888     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2889       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   2890     goto cleanup;
   2891   }
   2892 
   2893   number_channels=(cl_uint) image->number_channels;
   2894   intensityMethod=(cl_uint) method;
   2895   colorspace=(cl_uint) image->colorspace;
   2896 
   2897   i=0;
   2898   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   2899   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
   2900   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
   2901   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
   2902   if (status != CL_SUCCESS)
   2903   {
   2904     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   2905       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   2906     goto cleanup;
   2907   }
   2908 
   2909   gsize[0]=image->columns;
   2910   gsize[1]=image->rows;
   2911   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
   2912     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
   2913     MagickFalse,exception);
   2914 
   2915 cleanup:
   2916 
   2917   if (imageBuffer != (cl_mem) NULL)
   2918     ReleaseOpenCLMemObject(imageBuffer);
   2919   if (grayscaleKernel != (cl_kernel) NULL)
   2920     ReleaseOpenCLKernel(grayscaleKernel);
   2921   if (queue != (cl_command_queue) NULL)
   2922     ReleaseOpenCLCommandQueue(device,queue);
   2923   if (device != (MagickCLDevice) NULL)
   2924     ReleaseOpenCLDevice(device);
   2925 
   2926   return(outputReady);
   2927 }
   2928 
   2929 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
   2930   const PixelIntensityMethod method,ExceptionInfo *exception)
   2931 {
   2932   MagickBooleanType
   2933     status;
   2934 
   2935   MagickCLEnv
   2936     clEnv;
   2937 
   2938   assert(image != NULL);
   2939   assert(exception != (ExceptionInfo *) NULL);
   2940 
   2941   if ((checkAccelerateCondition(image) == MagickFalse) ||
   2942       (checkPixelIntensity(image,method) == MagickFalse))
   2943     return(MagickFalse);
   2944 
   2945   if (image->number_channels < 3)
   2946     return(MagickFalse);
   2947 
   2948   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
   2949       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
   2950       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
   2951     return(MagickFalse);
   2952 
   2953   clEnv=getOpenCLEnvironment(exception);
   2954   if (clEnv == (MagickCLEnv) NULL)
   2955     return(MagickFalse);
   2956 
   2957   status=ComputeGrayscaleImage(image,clEnv,method,exception);
   2958   return(status);
   2959 }
   2960 
   2961 /*
   2962 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2963 %                                                                             %
   2964 %                                                                             %
   2965 %                                                                             %
   2966 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
   2967 %                                                                             %
   2968 %                                                                             %
   2969 %                                                                             %
   2970 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2971 */
   2972 
   2973 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
   2974   const double radius,const double strength,ExceptionInfo *exception)
   2975 {
   2976   CacheView
   2977     *filteredImage_view,
   2978     *image_view;
   2979 
   2980   cl_command_queue
   2981     queue;
   2982 
   2983   cl_int
   2984     clStatus,
   2985     iRadius;
   2986 
   2987   cl_kernel
   2988     blurRowKernel,
   2989     blurColumnKernel;
   2990 
   2991   cl_event
   2992     event;
   2993 
   2994   cl_mem
   2995     filteredImageBuffer,
   2996     imageBuffer,
   2997     imageKernelBuffer,
   2998     tempImageBuffer;
   2999 
   3000   cl_mem_flags
   3001     mem_flags;
   3002 
   3003   const void
   3004     *inputPixels;
   3005 
   3006   Image
   3007     *filteredImage;
   3008 
   3009   MagickBooleanType
   3010     outputReady;
   3011 
   3012   MagickCLDevice
   3013     device;
   3014 
   3015   MagickSizeType
   3016     length;
   3017 
   3018   void
   3019     *filteredPixels,
   3020     *hostPtr;
   3021 
   3022   unsigned int
   3023     i,
   3024     imageColumns,
   3025     imageRows,
   3026     passes;
   3027 
   3028   filteredImage = NULL;
   3029   filteredImage_view = NULL;
   3030   imageBuffer = NULL;
   3031   filteredImageBuffer = NULL;
   3032   tempImageBuffer = NULL;
   3033   imageKernelBuffer = NULL;
   3034   blurRowKernel = NULL;
   3035   blurColumnKernel = NULL;
   3036   queue = NULL;
   3037   outputReady = MagickFalse;
   3038 
   3039   device = RequestOpenCLDevice(clEnv);
   3040   queue = AcquireOpenCLCommandQueue(device);
   3041 
   3042   /* Create and initialize OpenCL buffers. */
   3043   {
   3044     image_view=AcquireAuthenticCacheView(image,exception);
   3045     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3046     if (inputPixels == (const void *) NULL)
   3047     {
   3048       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   3049       goto cleanup;
   3050     }
   3051 
   3052     /* If the host pointer is aligned to the size of CLPixelPacket,
   3053      then use the host buffer directly from the GPU; otherwise,
   3054      create a buffer on the GPU and copy the data over */
   3055     if (ALIGNED(inputPixels,CLPixelPacket))
   3056     {
   3057       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   3058     }
   3059     else
   3060     {
   3061       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   3062     }
   3063     /* create a CL buffer from image pixel buffer */
   3064     length = image->columns * image->rows;
   3065     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3066     if (clStatus != CL_SUCCESS)
   3067     {
   3068       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3069       goto cleanup;
   3070     }
   3071   }
   3072 
   3073   /* create output */
   3074   {
   3075     filteredImage = CloneImage(image,0,0,MagickTrue,exception);
   3076     assert(filteredImage != NULL);
   3077     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   3078     {
   3079       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
   3080       goto cleanup;
   3081     }
   3082     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   3083     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   3084     if (filteredPixels == (void *) NULL)
   3085     {
   3086       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   3087       goto cleanup;
   3088     }
   3089 
   3090     if (ALIGNED(filteredPixels,CLPixelPacket))
   3091     {
   3092       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   3093       hostPtr = filteredPixels;
   3094     }
   3095     else
   3096     {
   3097       mem_flags = CL_MEM_WRITE_ONLY;
   3098       hostPtr = NULL;
   3099     }
   3100 
   3101     /* create a CL buffer from image pixel buffer */
   3102     length = image->columns * image->rows;
   3103     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   3104     if (clStatus != CL_SUCCESS)
   3105     {
   3106       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3107       goto cleanup;
   3108     }
   3109   }
   3110 
   3111   {
   3112     /* create temp buffer */
   3113     {
   3114       length = image->columns * image->rows;
   3115       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
   3116       if (clStatus != CL_SUCCESS)
   3117       {
   3118         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3119         goto cleanup;
   3120       }
   3121     }
   3122 
   3123     /* get the opencl kernel */
   3124     {
   3125       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
   3126       if (blurRowKernel == NULL)
   3127       {
   3128         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3129         goto cleanup;
   3130       };
   3131 
   3132       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
   3133       if (blurColumnKernel == NULL)
   3134       {
   3135         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3136         goto cleanup;
   3137       };
   3138     }
   3139 
   3140     {
   3141       imageColumns = (unsigned int) image->columns;
   3142       imageRows = (unsigned int) image->rows;
   3143       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); /* Normalized radius, 100% gives blur radius of 20% of the largest dimension */
   3144 
   3145       passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
   3146       passes = (passes < 1) ? 1: passes;
   3147 
   3148       /* set the kernel arguments */
   3149       i = 0;
   3150       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3151       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   3152       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   3153       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
   3154       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
   3155       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
   3156 
   3157       if (clStatus != CL_SUCCESS)
   3158       {
   3159         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3160         goto cleanup;
   3161       }
   3162     }
   3163 
   3164     /* launch the kernel */
   3165     {
   3166       int x;
   3167       for (x = 0; x < passes; ++x) {
   3168         size_t gsize[2];
   3169         size_t wsize[2];
   3170         size_t goffset[2];
   3171 
   3172         gsize[0] = 256;
   3173         gsize[1] = (image->rows + passes - 1) / passes;
   3174         wsize[0] = 256;
   3175         wsize[1] = 1;
   3176         goffset[0] = 0;
   3177         goffset[1] = x * gsize[1];
   3178 
   3179         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
   3180         if (clStatus != CL_SUCCESS)
   3181         {
   3182           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3183           goto cleanup;
   3184         }
   3185         clEnv->library->clFlush(queue);
   3186         RecordProfileData(device,blurRowKernel,event);
   3187       }
   3188     }
   3189 
   3190     {
   3191       cl_float FStrength = strength;
   3192       i = 0;
   3193       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3194       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   3195       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   3196       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
   3197       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
   3198       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
   3199       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
   3200 
   3201       if (clStatus != CL_SUCCESS)
   3202       {
   3203         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3204         goto cleanup;
   3205       }
   3206     }
   3207 
   3208     /* launch the kernel */
   3209     {
   3210       int x;
   3211       for (x = 0; x < passes; ++x) {
   3212         size_t gsize[2];
   3213         size_t wsize[2];
   3214         size_t goffset[2];
   3215 
   3216         gsize[0] = ((image->columns + 3) / 4) * 4;
   3217         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
   3218         wsize[0] = 4;
   3219         wsize[1] = 64;
   3220         goffset[0] = 0;
   3221         goffset[1] = x * gsize[1];
   3222 
   3223         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
   3224         if (clStatus != CL_SUCCESS)
   3225         {
   3226           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3227           goto cleanup;
   3228         }
   3229         clEnv->library->clFlush(queue);
   3230         RecordProfileData(device,blurColumnKernel,event);
   3231       }
   3232     }
   3233   }
   3234 
   3235   /* get result */
   3236   if (ALIGNED(filteredPixels,CLPixelPacket))
   3237   {
   3238     length = image->columns * image->rows;
   3239     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   3240   }
   3241   else
   3242   {
   3243     length = image->columns * image->rows;
   3244     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   3245   }
   3246   if (clStatus != CL_SUCCESS)
   3247   {
   3248     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   3249     goto cleanup;
   3250   }
   3251 
   3252   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   3253 
   3254 cleanup:
   3255 
   3256   image_view=DestroyCacheView(image_view);
   3257   if (filteredImage_view != NULL)
   3258     filteredImage_view=DestroyCacheView(filteredImage_view);
   3259 
   3260   if (imageBuffer!=NULL)
   3261     clEnv->library->clReleaseMemObject(imageBuffer);
   3262   if (filteredImageBuffer!=NULL)
   3263     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   3264   if (tempImageBuffer!=NULL)
   3265     clEnv->library->clReleaseMemObject(tempImageBuffer);
   3266   if (imageKernelBuffer!=NULL)
   3267     clEnv->library->clReleaseMemObject(imageKernelBuffer);
   3268   if (blurRowKernel!=NULL)
   3269     ReleaseOpenCLKernel(blurRowKernel);
   3270   if (blurColumnKernel!=NULL)
   3271     ReleaseOpenCLKernel(blurColumnKernel);
   3272   if (queue != NULL)
   3273     ReleaseOpenCLCommandQueue(device, queue);
   3274   if (device != NULL)
   3275     ReleaseOpenCLDevice(device);
   3276   if (outputReady == MagickFalse)
   3277   {
   3278     if (filteredImage != NULL)
   3279     {
   3280       DestroyImage(filteredImage);
   3281       filteredImage = NULL;
   3282     }
   3283   }
   3284 
   3285   return(filteredImage);
   3286 }
   3287 
   3288 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
   3289   const double radius,const double strength,ExceptionInfo *exception)
   3290 {
   3291   Image
   3292     *filteredImage;
   3293 
   3294   MagickCLEnv
   3295     clEnv;
   3296 
   3297   assert(image != NULL);
   3298   assert(exception != (ExceptionInfo *) NULL);
   3299 
   3300   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3301     return((Image *) NULL);
   3302 
   3303   clEnv=getOpenCLEnvironment(exception);
   3304   if (clEnv == (MagickCLEnv) NULL)
   3305     return((Image *) NULL);
   3306 
   3307   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
   3308     exception);
   3309   return(filteredImage);
   3310 }
   3311 
   3312 /*
   3313 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3314 %                                                                             %
   3315 %                                                                             %
   3316 %                                                                             %
   3317 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
   3318 %                                                                             %
   3319 %                                                                             %
   3320 %                                                                             %
   3321 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3322 */
   3323 
   3324 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
   3325   const double percent_brightness,const double percent_hue,
   3326   const double percent_saturation,const ColorspaceType colorspace,
   3327   ExceptionInfo *exception)
   3328 {
   3329   CacheView
   3330     *image_view;
   3331 
   3332   cl_float
   3333     bright,
   3334     hue,
   3335     saturation;
   3336 
   3337   cl_command_queue
   3338     queue;
   3339 
   3340   cl_int
   3341     color,
   3342     clStatus;
   3343 
   3344   cl_kernel
   3345     modulateKernel;
   3346 
   3347   cl_event
   3348     event;
   3349 
   3350   cl_mem
   3351     imageBuffer;
   3352 
   3353   cl_mem_flags
   3354     mem_flags;
   3355 
   3356   MagickBooleanType
   3357     outputReady;
   3358 
   3359   MagickCLDevice
   3360     device;
   3361 
   3362   MagickSizeType
   3363     length;
   3364 
   3365   register ssize_t
   3366     i;
   3367 
   3368   void
   3369     *inputPixels;
   3370 
   3371   inputPixels = NULL;
   3372   imageBuffer = NULL;
   3373   modulateKernel = NULL;
   3374 
   3375   assert(image != (Image *) NULL);
   3376   assert(image->signature == MagickCoreSignature);
   3377   if (image->debug != MagickFalse)
   3378     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
   3379 
   3380   /*
   3381    * initialize opencl env
   3382    */
   3383   device = RequestOpenCLDevice(clEnv);
   3384   queue = AcquireOpenCLCommandQueue(device);
   3385 
   3386   outputReady = MagickFalse;
   3387 
   3388   /* Create and initialize OpenCL buffers.
   3389    inputPixels = AcquirePixelCachePixels(image, &length, exception);
   3390    assume this  will get a writable image
   3391    */
   3392   image_view=AcquireAuthenticCacheView(image,exception);
   3393   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3394   if (inputPixels == (void *) NULL)
   3395   {
   3396     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
   3397     goto cleanup;
   3398   }
   3399 
   3400   /* If the host pointer is aligned to the size of CLPixelPacket,
   3401    then use the host buffer directly from the GPU; otherwise,
   3402    create a buffer on the GPU and copy the data over
   3403    */
   3404   if (ALIGNED(inputPixels,CLPixelPacket))
   3405   {
   3406     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   3407   }
   3408   else
   3409   {
   3410     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   3411   }
   3412   /* create a CL buffer from image pixel buffer */
   3413   length = image->columns * image->rows;
   3414   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3415   if (clStatus != CL_SUCCESS)
   3416   {
   3417     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
   3418     goto cleanup;
   3419   }
   3420 
   3421   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
   3422   if (modulateKernel == NULL)
   3423   {
   3424     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
   3425     goto cleanup;
   3426   }
   3427 
   3428   bright=percent_brightness;
   3429   hue=percent_hue;
   3430   saturation=percent_saturation;
   3431   color=colorspace;
   3432 
   3433   i = 0;
   3434   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   3435   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
   3436   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
   3437   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
   3438   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
   3439   if (clStatus != CL_SUCCESS)
   3440   {
   3441     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
   3442     goto cleanup;
   3443   }
   3444 
   3445   {
   3446     size_t global_work_size[2];
   3447     global_work_size[0] = image->columns;
   3448     global_work_size[1] = image->rows;
   3449     /* launch the kernel */
   3450 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
   3451     if (clStatus != CL_SUCCESS)
   3452     {
   3453       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3454       goto cleanup;
   3455     }
   3456     RecordProfileData(device,modulateKernel,event);
   3457   }
   3458 
   3459   if (ALIGNED(inputPixels,CLPixelPacket))
   3460   {
   3461     length = image->columns * image->rows;
   3462     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   3463   }
   3464   else
   3465   {
   3466     length = image->columns * image->rows;
   3467     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   3468   }
   3469   if (clStatus != CL_SUCCESS)
   3470   {
   3471     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
   3472     goto cleanup;
   3473   }
   3474 
   3475   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
   3476 
   3477 cleanup:
   3478 
   3479   image_view=DestroyCacheView(image_view);
   3480 
   3481   if (imageBuffer!=NULL)
   3482     clEnv->library->clReleaseMemObject(imageBuffer);
   3483   if (modulateKernel!=NULL)
   3484     ReleaseOpenCLKernel(modulateKernel);
   3485   if (queue != NULL)
   3486     ReleaseOpenCLCommandQueue(device,queue);
   3487   if (device != NULL)
   3488     ReleaseOpenCLDevice(device);
   3489 
   3490   return outputReady;
   3491 
   3492 }
   3493 
   3494 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
   3495   const double percent_brightness,const double percent_hue,
   3496   const double percent_saturation,const ColorspaceType colorspace,
   3497   ExceptionInfo *exception)
   3498 {
   3499   MagickBooleanType
   3500     status;
   3501 
   3502   MagickCLEnv
   3503     clEnv;
   3504 
   3505   assert(image != NULL);
   3506   assert(exception != (ExceptionInfo *) NULL);
   3507 
   3508   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3509     return(MagickFalse);
   3510 
   3511   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
   3512     return(MagickFalse);
   3513 
   3514   clEnv=getOpenCLEnvironment(exception);
   3515   if (clEnv == (MagickCLEnv) NULL)
   3516     return(MagickFalse);
   3517 
   3518   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
   3519     percent_saturation,colorspace,exception);
   3520   return(status);
   3521 }
   3522 
   3523 /*
   3524 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3525 %                                                                             %
   3526 %                                                                             %
   3527 %                                                                             %
   3528 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
   3529 %                                                                             %
   3530 %                                                                             %
   3531 %                                                                             %
   3532 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3533 */
   3534 
   3535 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
   3536   const double *kernel,const size_t width,const OffsetInfo *offset,
   3537   ExceptionInfo *exception)
   3538 {
   3539   CacheView
   3540     *filteredImage_view,
   3541     *image_view;
   3542 
   3543   cl_command_queue
   3544     queue;
   3545 
   3546   cl_float4
   3547     biasPixel;
   3548 
   3549   cl_int
   3550     clStatus;
   3551 
   3552   cl_kernel
   3553     motionBlurKernel;
   3554 
   3555   cl_event
   3556     event;
   3557 
   3558   cl_mem
   3559     filteredImageBuffer,
   3560     imageBuffer,
   3561     imageKernelBuffer,
   3562     offsetBuffer;
   3563 
   3564   cl_mem_flags
   3565     mem_flags;
   3566 
   3567   const void
   3568     *inputPixels;
   3569 
   3570   float
   3571     *kernelBufferPtr;
   3572 
   3573   Image
   3574     *filteredImage;
   3575 
   3576   int
   3577     *offsetBufferPtr;
   3578 
   3579   MagickBooleanType
   3580     outputReady;
   3581 
   3582   MagickCLDevice
   3583     device;
   3584 
   3585   PixelInfo
   3586     bias;
   3587 
   3588   MagickSizeType
   3589     length;
   3590 
   3591   size_t
   3592     global_work_size[2],
   3593     local_work_size[2];
   3594 
   3595   unsigned int
   3596     i,
   3597     imageHeight,
   3598     imageWidth,
   3599     matte;
   3600 
   3601   void
   3602     *filteredPixels,
   3603     *hostPtr;
   3604 
   3605   outputReady = MagickFalse;
   3606   filteredImage = NULL;
   3607   filteredImage_view = NULL;
   3608   imageBuffer = NULL;
   3609   filteredImageBuffer = NULL;
   3610   imageKernelBuffer = NULL;
   3611   motionBlurKernel = NULL;
   3612   queue = NULL;
   3613 
   3614   device = RequestOpenCLDevice(clEnv);
   3615 
   3616   /* Create and initialize OpenCL buffers. */
   3617 
   3618   image_view=AcquireAuthenticCacheView(image,exception);
   3619   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   3620   if (inputPixels == (const void *) NULL)
   3621   {
   3622     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
   3623       "UnableToReadPixelCache.","`%s'",image->filename);
   3624     goto cleanup;
   3625   }
   3626 
   3627   /*
   3628     If the host pointer is aligned to the size of CLPixelPacket, then use
   3629     the host buffer directly from the GPU; otherwise, create a buffer on
   3630     the GPU and copy the data over
   3631   */
   3632   if (ALIGNED(inputPixels,CLPixelPacket))
   3633   {
   3634     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   3635   }
   3636   else
   3637   {
   3638     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   3639   }
   3640   /*
   3641     create a CL buffer from image pixel buffer
   3642   */
   3643   length = image->columns * image->rows;
   3644   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
   3645     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   3646   if (clStatus != CL_SUCCESS)
   3647   {
   3648     (void) ThrowMagickException(exception, GetMagickModule(),
   3649       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3650     goto cleanup;
   3651   }
   3652 
   3653 
   3654   filteredImage = CloneImage(image,image->columns,image->rows,
   3655     MagickTrue,exception);
   3656   assert(filteredImage != NULL);
   3657   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
   3658   {
   3659     (void) ThrowMagickException(exception, GetMagickModule(),
   3660       ResourceLimitError, "CloneImage failed.", ".");
   3661     goto cleanup;
   3662   }
   3663   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
   3664   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
   3665   if (filteredPixels == (void *) NULL)
   3666   {
   3667     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
   3668       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
   3669     goto cleanup;
   3670   }
   3671 
   3672   if (ALIGNED(filteredPixels,CLPixelPacket))
   3673   {
   3674     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
   3675     hostPtr = filteredPixels;
   3676   }
   3677   else
   3678   {
   3679     mem_flags = CL_MEM_WRITE_ONLY;
   3680     hostPtr = NULL;
   3681   }
   3682   /*
   3683     Create a CL buffer from image pixel buffer.
   3684   */
   3685   length = image->columns * image->rows;
   3686   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
   3687     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   3688   if (clStatus != CL_SUCCESS)
   3689   {
   3690     (void) ThrowMagickException(exception, GetMagickModule(),
   3691       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3692     goto cleanup;
   3693   }
   3694 
   3695 
   3696   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
   3697     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
   3698     &clStatus);
   3699   if (clStatus != CL_SUCCESS)
   3700   {
   3701     (void) ThrowMagickException(exception, GetMagickModule(),
   3702       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3703     goto cleanup;
   3704   }
   3705 
   3706   queue = AcquireOpenCLCommandQueue(device);
   3707   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
   3708     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
   3709   if (clStatus != CL_SUCCESS)
   3710   {
   3711     (void) ThrowMagickException(exception, GetMagickModule(),
   3712       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
   3713     goto cleanup;
   3714   }
   3715   for (i = 0; i < width; i++)
   3716   {
   3717     kernelBufferPtr[i] = (float) kernel[i];
   3718   }
   3719   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
   3720     0, NULL, NULL);
   3721  if (clStatus != CL_SUCCESS)
   3722   {
   3723     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3724       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   3725     goto cleanup;
   3726   }
   3727 
   3728   offsetBuffer = clEnv->library->clCreateBuffer(device->context,
   3729     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
   3730     &clStatus);
   3731   if (clStatus != CL_SUCCESS)
   3732   {
   3733     (void) ThrowMagickException(exception, GetMagickModule(),
   3734       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
   3735     goto cleanup;
   3736   }
   3737 
   3738   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
   3739     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
   3740   if (clStatus != CL_SUCCESS)
   3741   {
   3742     (void) ThrowMagickException(exception, GetMagickModule(),
   3743       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
   3744     goto cleanup;
   3745   }
   3746   for (i = 0; i < width; i++)
   3747   {
   3748     offsetBufferPtr[2*i] = (int)offset[i].x;
   3749     offsetBufferPtr[2*i+1] = (int)offset[i].y;
   3750   }
   3751   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
   3752     NULL, NULL);
   3753  if (clStatus != CL_SUCCESS)
   3754   {
   3755     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3756       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
   3757     goto cleanup;
   3758   }
   3759 
   3760 
   3761   /*
   3762     Get the OpenCL kernel
   3763   */
   3764   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
   3765   if (motionBlurKernel == NULL)
   3766   {
   3767     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3768       "AcquireOpenCLKernel failed.", ".");
   3769     goto cleanup;
   3770   }
   3771 
   3772   /*
   3773     Set the kernel arguments.
   3774   */
   3775   i = 0;
   3776   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3777     (void *)&imageBuffer);
   3778   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3779     (void *)&filteredImageBuffer);
   3780   imageWidth = (unsigned int) image->columns;
   3781   imageHeight = (unsigned int) image->rows;
   3782   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3783     &imageWidth);
   3784   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3785     &imageHeight);
   3786   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3787     (void *)&imageKernelBuffer);
   3788   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
   3789     &width);
   3790   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
   3791     (void *)&offsetBuffer);
   3792 
   3793   GetPixelInfo(image,&bias);
   3794   biasPixel.s[0] = bias.red;
   3795   biasPixel.s[1] = bias.green;
   3796   biasPixel.s[2] = bias.blue;
   3797   biasPixel.s[3] = bias.alpha;
   3798   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
   3799 
   3800   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
   3801   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
   3802   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
   3803   if (clStatus != CL_SUCCESS)
   3804   {
   3805     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3806       "clEnv->library->clSetKernelArg failed.", ".");
   3807     goto cleanup;
   3808   }
   3809 
   3810   /*
   3811     Launch the kernel.
   3812   */
   3813   local_work_size[0] = 16;
   3814   local_work_size[1] = 16;
   3815   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
   3816                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
   3817   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
   3818                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
   3819   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
   3820 	  global_work_size, local_work_size, 0, NULL, &event);
   3821 
   3822   if (clStatus != CL_SUCCESS)
   3823   {
   3824     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3825       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
   3826     goto cleanup;
   3827   }
   3828   RecordProfileData(device,motionBlurKernel,event);
   3829 
   3830   if (ALIGNED(filteredPixels,CLPixelPacket))
   3831   {
   3832     length = image->columns * image->rows;
   3833     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
   3834       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
   3835       NULL, &clStatus);
   3836   }
   3837   else
   3838   {
   3839     length = image->columns * image->rows;
   3840     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
   3841       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   3842   }
   3843   if (clStatus != CL_SUCCESS)
   3844   {
   3845     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
   3846       "Reading output image from CL buffer failed.", ".");
   3847     goto cleanup;
   3848   }
   3849   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
   3850 
   3851 cleanup:
   3852 
   3853   image_view=DestroyCacheView(image_view);
   3854   if (filteredImage_view != NULL)
   3855     filteredImage_view=DestroyCacheView(filteredImage_view);
   3856 
   3857   if (filteredImageBuffer!=NULL)
   3858     clEnv->library->clReleaseMemObject(filteredImageBuffer);
   3859   if (imageBuffer!=NULL)
   3860     clEnv->library->clReleaseMemObject(imageBuffer);
   3861   if (imageKernelBuffer!=NULL)
   3862     clEnv->library->clReleaseMemObject(imageKernelBuffer);
   3863   if (motionBlurKernel!=NULL)
   3864     ReleaseOpenCLKernel(motionBlurKernel);
   3865   if (queue != NULL)
   3866     ReleaseOpenCLCommandQueue(device,queue);
   3867   if (device != NULL)
   3868     ReleaseOpenCLDevice(device);
   3869   if (outputReady == MagickFalse && filteredImage != NULL)
   3870     filteredImage=DestroyImage(filteredImage);
   3871 
   3872   return(filteredImage);
   3873 }
   3874 
   3875 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
   3876   const double* kernel,const size_t width,const OffsetInfo *offset,
   3877   ExceptionInfo *exception)
   3878 {
   3879   Image
   3880     *filteredImage;
   3881 
   3882   MagickCLEnv
   3883     clEnv;
   3884 
   3885   assert(image != NULL);
   3886   assert(kernel != (double *) NULL);
   3887   assert(offset != (OffsetInfo *) NULL);
   3888   assert(exception != (ExceptionInfo *) NULL);
   3889 
   3890   if (checkAccelerateConditionRGBA(image) == MagickFalse)
   3891     return((Image *) NULL);
   3892 
   3893   clEnv=getOpenCLEnvironment(exception);
   3894   if (clEnv == (MagickCLEnv) NULL)
   3895     return((Image *) NULL);
   3896 
   3897   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
   3898     exception);
   3899   return(filteredImage);
   3900 }
   3901 
   3902 /*
   3903 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3904 %                                                                             %
   3905 %                                                                             %
   3906 %                                                                             %
   3907 %     A c c e l e r a t e R e s i z e I m a g e                               %
   3908 %                                                                             %
   3909 %                                                                             %
   3910 %                                                                             %
   3911 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3912 */
   3913 
   3914 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
   3915   cl_command_queue queue,const Image *image,Image *filteredImage,
   3916   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
   3917   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
   3918   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
   3919   const float xFactor,ExceptionInfo *exception)
   3920 {
   3921   cl_kernel
   3922     horizontalKernel;
   3923 
   3924   cl_int
   3925     status;
   3926 
   3927   const unsigned int
   3928     workgroupSize = 256;
   3929 
   3930   float
   3931     resizeFilterScale,
   3932     resizeFilterSupport,
   3933     resizeFilterWindowSupport,
   3934     resizeFilterBlur,
   3935     scale,
   3936     support;
   3937 
   3938   int
   3939     cacheRangeStart,
   3940     cacheRangeEnd,
   3941     numCachedPixels,
   3942     resizeFilterType,
   3943     resizeWindowType;
   3944 
   3945   MagickBooleanType
   3946     outputReady;
   3947 
   3948   size_t
   3949     gammaAccumulatorLocalMemorySize,
   3950     gsize[2],
   3951     i,
   3952     imageCacheLocalMemorySize,
   3953     pixelAccumulatorLocalMemorySize,
   3954     lsize[2],
   3955     totalLocalMemorySize,
   3956     weightAccumulatorLocalMemorySize;
   3957 
   3958   unsigned int
   3959     chunkSize,
   3960     pixelPerWorkgroup;
   3961 
   3962   horizontalKernel=NULL;
   3963   outputReady=MagickFalse;
   3964 
   3965   /*
   3966   Apply filter to resize vertically from image to resize image.
   3967   */
   3968   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
   3969   support=scale*GetResizeFilterSupport(resizeFilter);
   3970   if (support < 0.5)
   3971   {
   3972     /*
   3973     Support too small even for nearest neighbour: Reduce to point
   3974     sampling.
   3975     */
   3976     support=(float) 0.5;
   3977     scale=1.0;
   3978   }
   3979   scale=PerceptibleReciprocal(scale);
   3980 
   3981   if (resizedColumns < workgroupSize)
   3982   {
   3983     chunkSize=32;
   3984     pixelPerWorkgroup=32;
   3985   }
   3986   else
   3987   {
   3988     chunkSize=workgroupSize;
   3989     pixelPerWorkgroup=workgroupSize;
   3990   }
   3991 
   3992 DisableMSCWarning(4127)
   3993   while(1)
   3994 RestoreMSCWarning
   3995   {
   3996     /* calculate the local memory size needed per workgroup */
   3997     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
   3998     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
   3999       MagickEpsilon)+support+0.5);
   4000     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
   4001     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
   4002       number_channels;
   4003     totalLocalMemorySize=imageCacheLocalMemorySize;
   4004 
   4005     /* local size for the pixel accumulator */
   4006     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
   4007     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
   4008 
   4009     /* local memory size for the weight accumulator */
   4010     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4011     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
   4012 
   4013     /* local memory size for the gamma accumulator */
   4014     if ((number_channels == 4) || (number_channels == 2))
   4015       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4016     else
   4017       gammaAccumulatorLocalMemorySize=sizeof(float);
   4018     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
   4019 
   4020     if (totalLocalMemorySize <= device->local_memory_size)
   4021       break;
   4022     else
   4023     {
   4024       pixelPerWorkgroup=pixelPerWorkgroup/2;
   4025       chunkSize=chunkSize/2;
   4026       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
   4027       {
   4028         /* quit, fallback to CPU */
   4029         goto cleanup;
   4030       }
   4031     }
   4032   }
   4033 
   4034   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
   4035   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
   4036 
   4037   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
   4038   if (horizontalKernel == (cl_kernel) NULL)
   4039   {
   4040     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4041       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
   4042     goto cleanup;
   4043   }
   4044 
   4045   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
   4046   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
   4047   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
   4048   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
   4049 
   4050   i=0;
   4051   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
   4052   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
   4053   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
   4054   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
   4055   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
   4056   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
   4057   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
   4058   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
   4059   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
   4060   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
   4061   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
   4062   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
   4063   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
   4064   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
   4065   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
   4066   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
   4067   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
   4068   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
   4069   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
   4070   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
   4071   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
   4072   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
   4073 
   4074   if (status != CL_SUCCESS)
   4075   {
   4076     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4077       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   4078     goto cleanup;
   4079   }
   4080 
   4081   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
   4082     workgroupSize;
   4083   gsize[1]=resizedRows;
   4084   lsize[0]=workgroupSize;
   4085   lsize[1]=1;
   4086   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
   4087     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
   4088     exception);
   4089 
   4090 cleanup:
   4091 
   4092   if (horizontalKernel != (cl_kernel) NULL)
   4093     ReleaseOpenCLKernel(horizontalKernel);
   4094 
   4095   return(outputReady);
   4096 }
   4097 
   4098 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
   4099   cl_command_queue queue,const Image *image,Image * filteredImage,
   4100   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
   4101   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
   4102   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
   4103   const float yFactor,ExceptionInfo *exception)
   4104 {
   4105   cl_kernel
   4106     verticalKernel;
   4107 
   4108   cl_int
   4109     status;
   4110 
   4111   const unsigned int
   4112     workgroupSize = 256;
   4113 
   4114   float
   4115     resizeFilterScale,
   4116     resizeFilterSupport,
   4117     resizeFilterWindowSupport,
   4118     resizeFilterBlur,
   4119     scale,
   4120     support;
   4121 
   4122   int
   4123     cacheRangeStart,
   4124     cacheRangeEnd,
   4125     numCachedPixels,
   4126     resizeFilterType,
   4127     resizeWindowType;
   4128 
   4129   MagickBooleanType
   4130     outputReady;
   4131 
   4132   size_t
   4133     gammaAccumulatorLocalMemorySize,
   4134     gsize[2],
   4135     i,
   4136     imageCacheLocalMemorySize,
   4137     pixelAccumulatorLocalMemorySize,
   4138     lsize[2],
   4139     totalLocalMemorySize,
   4140     weightAccumulatorLocalMemorySize;
   4141 
   4142   unsigned int
   4143     chunkSize,
   4144     pixelPerWorkgroup;
   4145 
   4146   verticalKernel=NULL;
   4147   outputReady=MagickFalse;
   4148 
   4149   /*
   4150   Apply filter to resize vertically from image to resize image.
   4151   */
   4152   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
   4153   support=scale*GetResizeFilterSupport(resizeFilter);
   4154   if (support < 0.5)
   4155   {
   4156     /*
   4157     Support too small even for nearest neighbour: Reduce to point
   4158     sampling.
   4159     */
   4160     support=(float) 0.5;
   4161     scale=1.0;
   4162   }
   4163   scale=PerceptibleReciprocal(scale);
   4164 
   4165   if (resizedRows < workgroupSize)
   4166   {
   4167     chunkSize=32;
   4168     pixelPerWorkgroup=32;
   4169   }
   4170   else
   4171   {
   4172     chunkSize=workgroupSize;
   4173     pixelPerWorkgroup=workgroupSize;
   4174   }
   4175 
   4176 DisableMSCWarning(4127)
   4177   while(1)
   4178 RestoreMSCWarning
   4179   {
   4180     /* calculate the local memory size needed per workgroup */
   4181     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
   4182     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
   4183       MagickEpsilon)+support+0.5);
   4184     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
   4185     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
   4186       number_channels;
   4187     totalLocalMemorySize=imageCacheLocalMemorySize;
   4188 
   4189     /* local size for the pixel accumulator */
   4190     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
   4191     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
   4192 
   4193     /* local memory size for the weight accumulator */
   4194     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4195     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
   4196 
   4197     /* local memory size for the gamma accumulator */
   4198     if ((number_channels == 4) || (number_channels == 2))
   4199       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
   4200     else
   4201       gammaAccumulatorLocalMemorySize=sizeof(float);
   4202     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
   4203 
   4204     if (totalLocalMemorySize <= device->local_memory_size)
   4205       break;
   4206     else
   4207     {
   4208       pixelPerWorkgroup=pixelPerWorkgroup/2;
   4209       chunkSize=chunkSize/2;
   4210       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
   4211       {
   4212         /* quit, fallback to CPU */
   4213         goto cleanup;
   4214       }
   4215     }
   4216   }
   4217 
   4218   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
   4219   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
   4220 
   4221   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
   4222   if (verticalKernel == (cl_kernel) NULL)
   4223   {
   4224     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4225       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4226     goto cleanup;
   4227   }
   4228 
   4229   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
   4230   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
   4231   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
   4232   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
   4233 
   4234   i=0;
   4235   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
   4236   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
   4237   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
   4238   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
   4239   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
   4240   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
   4241   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
   4242   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
   4243   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
   4244   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
   4245   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
   4246   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
   4247   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
   4248   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
   4249   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
   4250   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
   4251   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
   4252   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
   4253   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
   4254   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
   4255   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
   4256   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
   4257 
   4258   if (status != CL_SUCCESS)
   4259   {
   4260     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4261       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   4262     goto cleanup;
   4263   }
   4264 
   4265   gsize[0]=resizedColumns;
   4266   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
   4267     workgroupSize;
   4268   lsize[0]=1;
   4269   lsize[1]=workgroupSize;
   4270   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
   4271     gsize,lsize,image,filteredImage,MagickFalse,exception);
   4272 
   4273 cleanup:
   4274 
   4275   if (verticalKernel != (cl_kernel) NULL)
   4276     ReleaseOpenCLKernel(verticalKernel);
   4277 
   4278   return(outputReady);
   4279 }
   4280 
   4281 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
   4282   const size_t resizedColumns,const size_t resizedRows,
   4283   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
   4284 {
   4285   cl_command_queue
   4286     queue;
   4287 
   4288   cl_mem
   4289     cubicCoefficientsBuffer,
   4290     filteredImageBuffer,
   4291     imageBuffer,
   4292     tempImageBuffer;
   4293 
   4294   cl_uint
   4295     number_channels;
   4296 
   4297   const double
   4298     *resizeFilterCoefficient;
   4299 
   4300   float
   4301     coefficientBuffer[7],
   4302     xFactor,
   4303     yFactor;
   4304 
   4305   MagickBooleanType
   4306     outputReady;
   4307 
   4308   MagickCLDevice
   4309     device;
   4310 
   4311   MagickSizeType
   4312     length;
   4313 
   4314   Image
   4315     *filteredImage;
   4316 
   4317   size_t
   4318     i;
   4319 
   4320   filteredImage=NULL;
   4321   imageBuffer=NULL;
   4322   filteredImageBuffer=NULL;
   4323   tempImageBuffer=NULL;
   4324   cubicCoefficientsBuffer=NULL;
   4325   outputReady=MagickFalse;
   4326 
   4327   device=RequestOpenCLDevice(clEnv);
   4328   queue=AcquireOpenCLCommandQueue(device);
   4329   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
   4330     exception);
   4331   if (filteredImage == (Image *) NULL)
   4332     goto cleanup;
   4333   if (filteredImage->number_channels != image->number_channels)
   4334     goto cleanup;
   4335   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4336   if (imageBuffer == (cl_mem) NULL)
   4337     goto cleanup;
   4338   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4339   if (filteredImageBuffer == (cl_mem) NULL)
   4340     goto cleanup;
   4341 
   4342   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
   4343   for (i = 0; i < 7; i++)
   4344     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
   4345   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
   4346     CL_MEM_READ_ONLY,sizeof(coefficientBuffer),&coefficientBuffer);
   4347   if (cubicCoefficientsBuffer == (cl_mem) NULL)
   4348   {
   4349     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4350       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4351     goto cleanup;
   4352   }
   4353 
   4354   number_channels=(cl_uint) image->number_channels;
   4355   xFactor=(float) resizedColumns/(float) image->columns;
   4356   yFactor=(float) resizedRows/(float) image->rows;
   4357   if (xFactor > yFactor)
   4358   {
   4359     length=resizedColumns*image->rows*number_channels;
   4360     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4361       sizeof(CLQuantum),(void *) NULL);
   4362     if (tempImageBuffer == (cl_mem) NULL)
   4363     {
   4364       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4365         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4366       goto cleanup;
   4367     }
   4368 
   4369     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
   4370       imageBuffer,number_channels,(cl_uint) image->columns,
   4371       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
   4372       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
   4373       exception);
   4374     if (outputReady == MagickFalse)
   4375       goto cleanup;
   4376 
   4377     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
   4378       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
   4379       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
   4380       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
   4381       exception);
   4382     if (outputReady == MagickFalse)
   4383       goto cleanup;
   4384   }
   4385   else
   4386   {
   4387     length=image->columns*resizedRows*number_channels;
   4388     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4389       sizeof(CLQuantum),(void *) NULL);
   4390     if (tempImageBuffer == (cl_mem) NULL)
   4391     {
   4392       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4393         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4394       goto cleanup;
   4395     }
   4396 
   4397     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
   4398       imageBuffer,number_channels,(cl_uint) image->columns,
   4399       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
   4400       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
   4401       exception);
   4402     if (outputReady == MagickFalse)
   4403       goto cleanup;
   4404 
   4405     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
   4406       tempImageBuffer,number_channels,(cl_uint) image->columns,
   4407       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
   4408       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
   4409       exception);
   4410     if (outputReady == MagickFalse)
   4411       goto cleanup;
   4412   }
   4413 
   4414 cleanup:
   4415 
   4416   if (imageBuffer != (cl_mem) NULL)
   4417     ReleaseOpenCLMemObject(imageBuffer);
   4418   if (filteredImageBuffer != (cl_mem) NULL)
   4419     ReleaseOpenCLMemObject(filteredImageBuffer);
   4420   if (tempImageBuffer != (cl_mem) NULL)
   4421     ReleaseOpenCLMemObject(tempImageBuffer);
   4422   if (cubicCoefficientsBuffer != (cl_mem) NULL)
   4423     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
   4424   if (queue != (cl_command_queue) NULL)
   4425     ReleaseOpenCLCommandQueue(device,queue);
   4426   if (device != (MagickCLDevice) NULL)
   4427     ReleaseOpenCLDevice(device);
   4428   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4429     filteredImage=DestroyImage(filteredImage);
   4430 
   4431   return(filteredImage);
   4432 }
   4433 
   4434 static MagickBooleanType gpuSupportedResizeWeighting(
   4435   ResizeWeightingFunctionType f)
   4436 {
   4437   unsigned int
   4438     i;
   4439 
   4440   for (i = 0; ;i++)
   4441   {
   4442     if (supportedResizeWeighting[i] == LastWeightingFunction)
   4443       break;
   4444     if (supportedResizeWeighting[i] == f)
   4445       return(MagickTrue);
   4446   }
   4447   return(MagickFalse);
   4448 }
   4449 
   4450 MagickPrivate Image *AccelerateResizeImage(const Image *image,
   4451   const size_t resizedColumns,const size_t resizedRows,
   4452   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
   4453 {
   4454   Image
   4455     *filteredImage;
   4456 
   4457   MagickCLEnv
   4458     clEnv;
   4459 
   4460   assert(image != NULL);
   4461   assert(exception != (ExceptionInfo *) NULL);
   4462 
   4463   if (checkAccelerateCondition(image) == MagickFalse)
   4464     return((Image *) NULL);
   4465 
   4466   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
   4467          resizeFilter)) == MagickFalse) ||
   4468       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
   4469          resizeFilter)) == MagickFalse))
   4470     return((Image *) NULL);
   4471 
   4472   clEnv=getOpenCLEnvironment(exception);
   4473   if (clEnv == (MagickCLEnv) NULL)
   4474     return((Image *) NULL);
   4475 
   4476   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
   4477     resizeFilter,exception);
   4478   return(filteredImage);
   4479 }
   4480 
   4481 /*
   4482 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4483 %                                                                             %
   4484 %                                                                             %
   4485 %                                                                             %
   4486 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
   4487 %                                                                             %
   4488 %                                                                             %
   4489 %                                                                             %
   4490 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4491 */
   4492 
   4493 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
   4494   const double angle,ExceptionInfo *exception)
   4495 {
   4496   cl_command_queue
   4497     queue;
   4498 
   4499   cl_float2
   4500     blurCenter;
   4501 
   4502   cl_int
   4503     status;
   4504 
   4505   cl_mem
   4506     cosThetaBuffer,
   4507     filteredImageBuffer,
   4508     imageBuffer,
   4509     sinThetaBuffer;
   4510 
   4511   cl_kernel
   4512     rotationalBlurKernel;
   4513 
   4514   cl_uint
   4515     cossin_theta_size,
   4516     number_channels;
   4517 
   4518   float
   4519     blurRadius,
   4520     *cosThetaPtr,
   4521     offset,
   4522     *sinThetaPtr,
   4523     theta;
   4524 
   4525   Image
   4526     *filteredImage;
   4527 
   4528   MagickBooleanType
   4529     outputReady;
   4530 
   4531   MagickCLDevice
   4532     device;
   4533 
   4534   size_t
   4535     gsize[2],
   4536     i;
   4537 
   4538   filteredImage=NULL;
   4539   imageBuffer=NULL;
   4540   filteredImageBuffer=NULL;
   4541   sinThetaBuffer=NULL;
   4542   cosThetaBuffer=NULL;
   4543   rotationalBlurKernel=NULL;
   4544   outputReady=MagickFalse;
   4545 
   4546   device=RequestOpenCLDevice(clEnv);
   4547   queue=AcquireOpenCLCommandQueue(device);
   4548   filteredImage=cloneImage(image,exception);
   4549   if (filteredImage == (Image *) NULL)
   4550     goto cleanup;
   4551   if (filteredImage->number_channels != image->number_channels)
   4552     goto cleanup;
   4553   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4554   if (imageBuffer == (cl_mem) NULL)
   4555     goto cleanup;
   4556   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4557   if (filteredImageBuffer == (cl_mem) NULL)
   4558     goto cleanup;
   4559 
   4560   blurCenter.x=(float) (image->columns-1)/2.0;
   4561   blurCenter.y=(float) (image->rows-1)/2.0;
   4562   blurRadius=hypot(blurCenter.x,blurCenter.y);
   4563   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
   4564     (double) blurRadius)+2UL);
   4565 
   4566   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
   4567   if (cosThetaPtr == (float *) NULL)
   4568     goto cleanup;
   4569   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
   4570   if (sinThetaPtr == (float *) NULL)
   4571   {
   4572     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
   4573     goto cleanup;
   4574   }
   4575 
   4576   theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
   4577   offset=theta*(float) (cossin_theta_size-1)/2.0;
   4578   for (i=0; i < (ssize_t) cossin_theta_size; i++)
   4579   {
   4580     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
   4581     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
   4582   }
   4583 
   4584   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   4585     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
   4586   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
   4587   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
   4588     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
   4589   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
   4590   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
   4591   {
   4592     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4593       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4594     goto cleanup;
   4595   }
   4596 
   4597   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
   4598   if (rotationalBlurKernel == (cl_kernel) NULL)
   4599   {
   4600     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4601       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4602     goto cleanup;
   4603   }
   4604 
   4605   number_channels=(cl_uint) image->number_channels;
   4606 
   4607   i=0;
   4608   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4609   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
   4610   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
   4611   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
   4612   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
   4613   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
   4614   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
   4615   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   4616   if (status != CL_SUCCESS)
   4617   {
   4618     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4619       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4620     goto cleanup;
   4621   }
   4622 
   4623   gsize[0]=image->columns;
   4624   gsize[1]=image->rows;
   4625   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
   4626     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
   4627     MagickFalse,exception);
   4628 
   4629 cleanup:
   4630 
   4631   if (imageBuffer != (cl_mem) NULL)
   4632     ReleaseOpenCLMemObject(imageBuffer);
   4633   if (filteredImageBuffer != (cl_mem) NULL)
   4634     ReleaseOpenCLMemObject(filteredImageBuffer);
   4635   if (sinThetaBuffer != (cl_mem) NULL)
   4636     ReleaseOpenCLMemObject(sinThetaBuffer);
   4637   if (cosThetaBuffer != (cl_mem) NULL)
   4638     ReleaseOpenCLMemObject(cosThetaBuffer);
   4639   if (rotationalBlurKernel != (cl_kernel) NULL)
   4640     ReleaseOpenCLKernel(rotationalBlurKernel);
   4641   if (queue != (cl_command_queue) NULL)
   4642     ReleaseOpenCLCommandQueue(device,queue);
   4643   if (device != (MagickCLDevice) NULL)
   4644     ReleaseOpenCLDevice(device);
   4645   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4646     filteredImage=DestroyImage(filteredImage);
   4647 
   4648   return(filteredImage);
   4649 }
   4650 
   4651 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
   4652   const double angle,ExceptionInfo *exception)
   4653 {
   4654   Image
   4655     *filteredImage;
   4656 
   4657   MagickCLEnv
   4658     clEnv;
   4659 
   4660   assert(image != NULL);
   4661   assert(exception != (ExceptionInfo *) NULL);
   4662 
   4663   if (checkAccelerateCondition(image) == MagickFalse)
   4664     return((Image *) NULL);
   4665 
   4666   clEnv=getOpenCLEnvironment(exception);
   4667   if (clEnv == (MagickCLEnv) NULL)
   4668     return((Image *) NULL);
   4669 
   4670   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
   4671   return filteredImage;
   4672 }
   4673 
   4674 /*
   4675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4676 %                                                                             %
   4677 %                                                                             %
   4678 %                                                                             %
   4679 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
   4680 %                                                                             %
   4681 %                                                                             %
   4682 %                                                                             %
   4683 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4684 */
   4685 
   4686 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
   4687   const double radius,const double sigma,const double gain,
   4688   const double threshold,ExceptionInfo *exception)
   4689 {
   4690   cl_command_queue
   4691     queue;
   4692 
   4693   cl_int
   4694     status;
   4695 
   4696   cl_kernel
   4697     blurRowKernel,
   4698     unsharpMaskBlurColumnKernel;
   4699 
   4700   cl_mem
   4701     filteredImageBuffer,
   4702     imageBuffer,
   4703     imageKernelBuffer,
   4704     tempImageBuffer;
   4705 
   4706   cl_uint
   4707     imageColumns,
   4708     imageRows,
   4709     kernelWidth,
   4710     number_channels;
   4711 
   4712   float
   4713     fGain,
   4714     fThreshold;
   4715 
   4716   Image
   4717     *filteredImage;
   4718 
   4719   int
   4720     chunkSize;
   4721 
   4722   MagickBooleanType
   4723     outputReady;
   4724 
   4725   MagickCLDevice
   4726     device;
   4727 
   4728   MagickSizeType
   4729     length;
   4730 
   4731   size_t
   4732     gsize[2],
   4733     i,
   4734     lsize[2];
   4735 
   4736   filteredImage=NULL;
   4737   imageBuffer=NULL;
   4738   filteredImageBuffer=NULL;
   4739   tempImageBuffer=NULL;
   4740   imageKernelBuffer=NULL;
   4741   blurRowKernel=NULL;
   4742   unsharpMaskBlurColumnKernel=NULL;
   4743   outputReady=MagickFalse;
   4744 
   4745   device=RequestOpenCLDevice(clEnv);
   4746   queue=AcquireOpenCLCommandQueue(device);
   4747   filteredImage=cloneImage(image,exception);
   4748   if (filteredImage == (Image *) NULL)
   4749     goto cleanup;
   4750   if (filteredImage->number_channels != image->number_channels)
   4751     goto cleanup;
   4752   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4753   if (imageBuffer == (cl_mem) NULL)
   4754     goto cleanup;
   4755   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4756   if (filteredImageBuffer == (cl_mem) NULL)
   4757     goto cleanup;
   4758 
   4759   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
   4760     exception);
   4761 
   4762   length=image->columns*image->rows;
   4763   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
   4764     sizeof(cl_float4),NULL);
   4765   if (tempImageBuffer == (cl_mem) NULL)
   4766   {
   4767     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4768       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
   4769     goto cleanup;
   4770   }
   4771 
   4772   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
   4773   if (blurRowKernel == (cl_kernel) NULL)
   4774   {
   4775     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4776       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4777     goto cleanup;
   4778   }
   4779 
   4780   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
   4781     "UnsharpMaskBlurColumn");
   4782   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
   4783   {
   4784     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4785       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4786     goto cleanup;
   4787   }
   4788 
   4789   number_channels=(cl_uint) image->number_channels;
   4790   imageColumns=(cl_uint) image->columns;
   4791   imageRows=(cl_uint) image->rows;
   4792 
   4793   chunkSize = 256;
   4794 
   4795   i=0;
   4796   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4797   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
   4798   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
   4799   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   4800   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   4801   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   4802   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   4803   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
   4804   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   4805   if (status != CL_SUCCESS)
   4806   {
   4807     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4808       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4809     goto cleanup;
   4810   }
   4811 
   4812   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
   4813   gsize[1]=image->rows;
   4814   lsize[0]=chunkSize;
   4815   lsize[1]=1;
   4816   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
   4817     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
   4818     exception);
   4819 
   4820   chunkSize=256;
   4821   fGain=(float) gain;
   4822   fThreshold=(float) threshold;
   4823 
   4824   i=0;
   4825   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4826   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
   4827   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
   4828   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
   4829   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   4830   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   4831   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
   4832   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
   4833   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   4834   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   4835   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
   4836   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
   4837   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   4838   if (status != CL_SUCCESS)
   4839   {
   4840     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4841       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
   4842     goto cleanup;
   4843   }
   4844 
   4845   gsize[0]=image->columns;
   4846   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
   4847   lsize[0]=1;
   4848   lsize[1]=chunkSize;
   4849   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
   4850     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
   4851     exception);
   4852 
   4853 cleanup:
   4854 
   4855   if (imageBuffer != (cl_mem) NULL)
   4856     ReleaseOpenCLMemObject(imageBuffer);
   4857   if (filteredImageBuffer != (cl_mem) NULL)
   4858     ReleaseOpenCLMemObject(filteredImageBuffer);
   4859   if (tempImageBuffer != (cl_mem) NULL)
   4860     ReleaseOpenCLMemObject(tempImageBuffer);
   4861   if (imageKernelBuffer != (cl_mem) NULL)
   4862     ReleaseOpenCLMemObject(imageKernelBuffer);
   4863   if (blurRowKernel != (cl_kernel) NULL)
   4864     ReleaseOpenCLKernel(blurRowKernel);
   4865   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
   4866     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
   4867   if (queue != (cl_command_queue) NULL)
   4868     ReleaseOpenCLCommandQueue(device,queue);
   4869   if (device != (MagickCLDevice) NULL)
   4870     ReleaseOpenCLDevice(device);
   4871   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4872     filteredImage=DestroyImage(filteredImage);
   4873 
   4874   return(filteredImage);
   4875 }
   4876 
   4877 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
   4878   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
   4879   const double threshold,ExceptionInfo *exception)
   4880 {
   4881   cl_command_queue
   4882     queue;
   4883 
   4884   cl_int
   4885     status;
   4886 
   4887   cl_kernel
   4888     unsharpMaskKernel;
   4889 
   4890   cl_mem
   4891     filteredImageBuffer,
   4892     imageBuffer,
   4893     imageKernelBuffer;
   4894 
   4895   cl_uint
   4896     imageColumns,
   4897     imageRows,
   4898     kernelWidth,
   4899     number_channels;
   4900 
   4901   float
   4902     fGain,
   4903     fThreshold;
   4904 
   4905   Image
   4906     *filteredImage;
   4907 
   4908   MagickBooleanType
   4909     outputReady;
   4910 
   4911   MagickCLDevice
   4912     device;
   4913 
   4914   size_t
   4915     gsize[2],
   4916     i,
   4917     lsize[2];
   4918 
   4919   filteredImage=NULL;
   4920   imageBuffer=NULL;
   4921   filteredImageBuffer=NULL;
   4922   imageKernelBuffer=NULL;
   4923   unsharpMaskKernel=NULL;
   4924   outputReady=MagickFalse;
   4925 
   4926   device=RequestOpenCLDevice(clEnv);
   4927   queue=AcquireOpenCLCommandQueue(device);
   4928   filteredImage=cloneImage(image,exception);
   4929   if (filteredImage == (Image *) NULL)
   4930     goto cleanup;
   4931   if (filteredImage->number_channels != image->number_channels)
   4932     goto cleanup;
   4933   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   4934   if (imageBuffer == (cl_mem) NULL)
   4935     goto cleanup;
   4936   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   4937   if (filteredImageBuffer == (cl_mem) NULL)
   4938     goto cleanup;
   4939 
   4940   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
   4941     exception);
   4942 
   4943   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
   4944   if (unsharpMaskKernel == NULL)
   4945   {
   4946     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4947       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   4948     goto cleanup;
   4949   }
   4950 
   4951   imageColumns=(cl_uint) image->columns;
   4952   imageRows=(cl_uint) image->rows;
   4953   number_channels=(cl_uint) image->number_channels;
   4954   fGain=(float) gain;
   4955   fThreshold=(float) threshold;
   4956 
   4957   i=0;
   4958   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   4959   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   4960   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
   4961   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
   4962   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
   4963   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
   4964   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
   4965   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
   4966   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
   4967   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
   4968   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   4969   if (status != CL_SUCCESS)
   4970   {
   4971     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   4972       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   4973     goto cleanup;
   4974   }
   4975 
   4976   gsize[0]=((image->columns + 7) / 8)*8;
   4977   gsize[1]=((image->rows + 31) / 32)*32;
   4978   lsize[0]=8;
   4979   lsize[1]=32;
   4980   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
   4981     gsize,lsize,image,filteredImage,MagickFalse,exception);
   4982 
   4983 cleanup:
   4984 
   4985   if (imageBuffer != (cl_mem) NULL)
   4986     ReleaseOpenCLMemObject(imageBuffer);
   4987   if (filteredImageBuffer != (cl_mem) NULL)
   4988     ReleaseOpenCLMemObject(filteredImageBuffer);
   4989   if (imageKernelBuffer != (cl_mem) NULL)
   4990     ReleaseOpenCLMemObject(imageKernelBuffer);
   4991   if (unsharpMaskKernel != (cl_kernel) NULL)
   4992     ReleaseOpenCLKernel(unsharpMaskKernel);
   4993   if (queue != (cl_command_queue) NULL)
   4994     ReleaseOpenCLCommandQueue(device,queue);
   4995   if (device != (MagickCLDevice) NULL)
   4996     ReleaseOpenCLDevice(device);
   4997   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   4998     filteredImage=DestroyImage(filteredImage);
   4999 
   5000   return(filteredImage);
   5001 }
   5002 
   5003 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
   5004   const double radius,const double sigma,const double gain,
   5005   const double threshold,ExceptionInfo *exception)
   5006 {
   5007   Image
   5008     *filteredImage;
   5009 
   5010   MagickCLEnv
   5011     clEnv;
   5012 
   5013   assert(image != NULL);
   5014   assert(exception != (ExceptionInfo *) NULL);
   5015 
   5016   if (checkAccelerateCondition(image) == MagickFalse)
   5017     return((Image *) NULL);
   5018 
   5019   clEnv=getOpenCLEnvironment(exception);
   5020   if (clEnv == (MagickCLEnv) NULL)
   5021     return((Image *) NULL);
   5022 
   5023   if (radius < 12.1)
   5024     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
   5025       threshold,exception);
   5026   else
   5027     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
   5028       threshold,exception);
   5029   return(filteredImage);
   5030 }
   5031 
   5032 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
   5033   const double threshold,ExceptionInfo *exception)
   5034 {
   5035   cl_command_queue
   5036     queue;
   5037 
   5038   const cl_int
   5039     PASSES=5;
   5040 
   5041   const int
   5042     TILESIZE=64,
   5043     PAD=1<<(PASSES-1),
   5044     SIZE=TILESIZE-2*PAD;
   5045 
   5046   cl_float
   5047     thresh;
   5048 
   5049   cl_int
   5050     status;
   5051 
   5052   cl_kernel
   5053     denoiseKernel;
   5054 
   5055   cl_mem
   5056     filteredImageBuffer,
   5057     imageBuffer;
   5058 
   5059   cl_uint
   5060     number_channels,
   5061     width,
   5062     height,
   5063     max_channels;
   5064 
   5065   Image
   5066     *filteredImage;
   5067 
   5068   MagickBooleanType
   5069     outputReady;
   5070 
   5071   MagickCLDevice
   5072     device;
   5073 
   5074   size_t
   5075     goffset[2],
   5076     gsize[2],
   5077     i,
   5078     lsize[2],
   5079     passes,
   5080     x;
   5081 
   5082   filteredImage=NULL;
   5083   imageBuffer=NULL;
   5084   filteredImageBuffer=NULL;
   5085   denoiseKernel=NULL;
   5086   queue=NULL;
   5087   outputReady=MagickFalse;
   5088 
   5089   device=RequestOpenCLDevice(clEnv);
   5090   /* Work around an issue on low end Intel devices */
   5091   if (strcmp("Intel(R) HD Graphics",device->name) == 0)
   5092     goto cleanup;
   5093   queue=AcquireOpenCLCommandQueue(device);
   5094   filteredImage=CloneImage(image,0,0,MagickTrue,
   5095     exception);
   5096   if (filteredImage == (Image *) NULL)
   5097     goto cleanup;
   5098   if (filteredImage->number_channels != image->number_channels)
   5099     goto cleanup;
   5100   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
   5101   if (imageBuffer == (cl_mem) NULL)
   5102     goto cleanup;
   5103   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
   5104   if (filteredImageBuffer == (cl_mem) NULL)
   5105     goto cleanup;
   5106 
   5107   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
   5108   if (denoiseKernel == (cl_kernel) NULL)
   5109   {
   5110     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5111       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
   5112     goto cleanup;
   5113   }
   5114 
   5115   number_channels=(cl_uint)image->number_channels;
   5116   width=(cl_uint)image->columns;
   5117   height=(cl_uint)image->rows;
   5118   max_channels=number_channels;
   5119   if ((max_channels == 4) || (max_channels == 2))
   5120     max_channels=max_channels-1;
   5121   thresh=threshold;
   5122   passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
   5123   passes=(passes < 1) ? 1 : passes;
   5124 
   5125   i=0;
   5126   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
   5127   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   5128   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
   5129   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
   5130   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
   5131   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
   5132   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
   5133   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
   5134   if (status != CL_SUCCESS)
   5135     {
   5136       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
   5137         ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
   5138       goto cleanup;
   5139     }
   5140 
   5141   for (x = 0; x < passes; ++x)
   5142   {
   5143     gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
   5144     gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
   5145     lsize[0]=TILESIZE;
   5146     lsize[1]=4;
   5147     goffset[0]=0;
   5148     goffset[1]=x*gsize[1];
   5149 
   5150     outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
   5151       image,filteredImage,MagickTrue,exception);
   5152     if (outputReady == MagickFalse)
   5153       break;
   5154   }
   5155 
   5156 cleanup:
   5157 
   5158   if (imageBuffer != (cl_mem) NULL)
   5159     ReleaseOpenCLMemObject(imageBuffer);
   5160   if (filteredImageBuffer != (cl_mem) NULL)
   5161     ReleaseOpenCLMemObject(filteredImageBuffer);
   5162   if (denoiseKernel != (cl_kernel) NULL)
   5163     ReleaseOpenCLKernel(denoiseKernel);
   5164   if (queue != (cl_command_queue) NULL)
   5165     ReleaseOpenCLCommandQueue(device,queue);
   5166   if (device != (MagickCLDevice) NULL)
   5167     ReleaseOpenCLDevice(device);
   5168   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
   5169     filteredImage=DestroyImage(filteredImage);
   5170 
   5171   return(filteredImage);
   5172 }
   5173 
   5174 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
   5175   const double threshold,ExceptionInfo *exception)
   5176 {
   5177   Image
   5178     *filteredImage;
   5179 
   5180   MagickCLEnv
   5181     clEnv;
   5182 
   5183   assert(image != NULL);
   5184   assert(exception != (ExceptionInfo *)NULL);
   5185 
   5186   if (checkAccelerateCondition(image) == MagickFalse)
   5187     return((Image *) NULL);
   5188 
   5189   clEnv=getOpenCLEnvironment(exception);
   5190   if (clEnv == (MagickCLEnv) NULL)
   5191     return((Image *) NULL);
   5192 
   5193   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
   5194 
   5195   return(filteredImage);
   5196 }
   5197 #endif /* MAGICKCORE_OPENCL_SUPPORT */
   5198