Code: Select all
/tmp/poclqGXVPp/program.cl:68:672: warning: implicit declaration of function 'async_work_group_strided_copy' is invalid in C99
__kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage, const __global float4 *blurRowData, __global CLPixelType *filtered_im, const unsigned int imageColumns, const unsigned int imageRows, __local float4* cachedData, __local float* cachedFilter, const ChannelType channel, const __global float *filter, const unsigned int width, const float gain, const float threshold) { const unsigned int radius = (width-1)/2; const int groupX = get_group_id(0); const int groupStartY = get_group_id(1)*get_local_size(1) - radius; const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; if (groupStartY >= 0 && groupStopY < imageRows) { event_t e = async_work_group_strided_copy(cachedData ,blurRowData+groupStartY*imageColumns+groupX ,groupStopY-groupStartY,imageColumns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX]; } barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < imageRows) { float4 blurredPixel = (float4) 0.0f; int i = 0;
^
/tmp/poclqGXVPp/program.cl:68:668: error: initializing 'event_t' with an expression of incompatible type 'int'
__kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage, const __global float4 *blurRowData, __global CLPixelType *filtered_im, const unsigned int imageColumns, const unsigned int imageRows, __local float4* cachedData, __local float* cachedFilter, const ChannelType channel, const __global float *filter, const unsigned int width, const float gain, const float threshold) { const unsigned int radius = (width-1)/2; const int groupX = get_group_id(0); const int groupStartY = get_group_id(1)*get_local_size(1) - radius; const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; if (groupStartY >= 0 && groupStopY < imageRows) { event_t e = async_work_group_strided_copy(cachedData ,blurRowData+groupStartY*imageColumns+groupX ,groupStopY-groupStartY,imageColumns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX]; } barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < imageRows) { float4 blurredPixel = (float4) 0.0f; int i = 0;
^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/tmp/poclqGXVPp/program.cl:76:1650: error: initializing 'event_t' with an expression of incompatible type 'int'
for (int j=0; j < UFACTOR; j++, i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } } for ( ; i < width; i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y) ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w))); float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]); float4 outputPixel = inputImagePixel - blurredPixel; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y) ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w)); } } __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage, const __global float4 *blurRowData, __global CLPixelType *filtered_im, const unsigned int imageColumns, const unsigned int imageRows, __local float4* cachedData, __local float* cachedFilter, const ChannelType channel, const __global float *filter, const unsigned int width, const float gain, const float threshold, const unsigned int offsetRows, const unsigned int section) { const unsigned int radius = (width-1)/2; const int groupX = get_group_id(0); const int groupStartY = get_group_id(1)*get_local_size(1) - radius; const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; blurRowData += imageColumns * radius * section; if (groupStartY >= 0 && groupStopY < imageRows) { event_t e = async_work_group_strided_copy(cachedData ,blurRowData+groupStartY*imageColumns+groupX ,groupStopY-groupStartY,imageColumns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX; cachedData[i] = *(blurRowData + pos); } barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < imageRows) { float4 blurredPixel = (float4) 0.0f; int i = 0;
^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/tmp/poclqGXVPp/program.cl:113:1406: error: initializing 'event_t' with an expression of incompatible type 'int'
1 warning and 3 errors generated.
convert: clBuildProgram failed. (-11) @ warning/opencl.c/CompileOpenCLKernels/898.
convert: clCreateCommandQueue failed. (0) @ warning/opencl.c/InitOpenCLEnvInternal/1211.
Code: Select all
typedef enum { UndefinedChannel, RedChannel = 0x0001, GrayChannel = 0x0001, CyanChannel = 0x0001, GreenChannel = 0x0002, MagentaChannel = 0x0002, BlueChannel = 0x0004, YellowChannel = 0x0004, AlphaChannel = 0x0008, OpacityChannel = 0x0008, MatteChannel = 0x0008, BlackChannel = 0x0020, IndexChannel = 0x0020, CompositeChannels = 0x002F, AllChannels = 0x7ffffff, TrueAlphaChannel = 0x0040, RGBChannels = 0x0080, GrayChannels = 0x0080, SyncChannels = 0x0100, DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel) } ChannelType;
#if (MAGICKCORE_QUANTUM_DEPTH == 8)
inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) value); }
#elif (MAGICKCORE_QUANTUM_DEPTH == 16)
inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (257.0f*value)); }
#elif (MAGICKCORE_QUANTUM_DEPTH == 32)
inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (16843009.0*value)); }
#endif
inline int ClampToCanvas(const int offset,const int range) { return clamp(offset, (int)0, range-1); }
inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section) { return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge)); }
inline CLQuantum ClampToQuantum(const float value) { return (CLQuantum) (clamp(value, 0.0f, (float) QuantumRange) + 0.5f); }
inline uint ScaleQuantumToMap(CLQuantum value) { if (value >= (CLQuantum) MaxMap) return ((uint)MaxMap); else return ((uint)value); }
inline float PerceptibleReciprocal(const float x) { float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0; return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon)); }
#define GetPixelAlpha(pixel) (QuantumRange-(pixel).w)
typedef enum { UndefinedPixelIntensityMethod = 0, AveragePixelIntensityMethod, BrightnessPixelIntensityMethod, LightnessPixelIntensityMethod, Rec601LumaPixelIntensityMethod, Rec601LuminancePixelIntensityMethod, Rec709LumaPixelIntensityMethod, Rec709LuminancePixelIntensityMethod, RMSPixelIntensityMethod, MSPixelIntensityMethod } PixelIntensityMethod;
typedef enum { UndefinedColorspace, RGBColorspace, GRAYColorspace, TransparentColorspace, OHTAColorspace, LabColorspace, XYZColorspace, YCbCrColorspace, YCCColorspace, YIQColorspace, YPbPrColorspace, YUVColorspace, CMYKColorspace, sRGBColorspace, HSBColorspace, HSLColorspace, HWBColorspace, Rec601LumaColorspace, Rec601YCbCrColorspace, Rec709LumaColorspace, Rec709YCbCrColorspace, LogColorspace, CMYColorspace, LuvColorspace, HCLColorspace, LCHColorspace, LMSColorspace, LCHabColorspace, LCHuvColorspace, scRGBColorspace, HSIColorspace, HSVColorspace, HCLpColorspace, YDbDrColorspace } ColorspaceType;
inline float RoundToUnity(const float value) { return clamp(value,0.0f,1.0f); }
inline CLQuantum getBlue(CLPixelType p) { return p.x; } inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; } inline float getBlueF4(float4 p) { return p.x; } inline void setBlueF4(float4* p, float value) { (*p).x = value; } inline CLQuantum getGreen(CLPixelType p) { return p.y; } inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; } inline float getGreenF4(float4 p) { return p.y; } inline void setGreenF4(float4* p, float value) { (*p).y = value; } inline CLQuantum getRed(CLPixelType p) { return p.z; } inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; } inline float getRedF4(float4 p) { return p.z; } inline void setRedF4(float4* p, float value) { (*p).z = value; } inline CLQuantum getOpacity(CLPixelType p) { return p.w; } inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; } inline float getOpacityF4(float4 p) { return p.w; } inline void setOpacityF4(float4* p, float value) { (*p).w = value; } inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; } inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p) { float red = getRed(p); float green = getGreen(p); float blue = getBlue(p); float intensity; if (colorspace == GRAYColorspace) return red; switch (method) { case AveragePixelIntensityMethod: { intensity=(red+green+blue)/3.0; break; } case BrightnessPixelIntensityMethod: { intensity=max(max(red,green),blue); break; } case LightnessPixelIntensityMethod: { intensity=(min(min(red,green),blue)+ max(max(red,green),blue))/2.0; break; } case MSPixelIntensityMethod: { intensity=(float) (((float) red*red+green*green+blue*blue)/ (3.0*QuantumRange)); break; } case Rec601LumaPixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec601LuminancePixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec709LumaPixelIntensityMethod: default: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case Rec709LuminancePixelIntensityMethod: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case RMSPixelIntensityMethod: { intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/ sqrt(3.0)); break; } } return intensity; }
__kernel void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) { int2 blockID; blockID.x = get_group_id(0); blockID.y = get_group_id(1); int2 imageAreaOrg; imageAreaOrg.x = blockID.x * get_local_size(0); imageAreaOrg.y = blockID.y * get_local_size(1); int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int2 cachedAreaOrg = imageAreaOrg - midFilterDimen; int2 cachedAreaDimen; cachedAreaDimen.x = get_local_size(0) + filterWidth - 1; cachedAreaDimen.y = get_local_size(1) + filterHeight - 1; int localID = get_local_id(1)*get_local_size(0)+get_local_id(0); int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y; int groupSize = get_local_size(0) * get_local_size(1); for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) { int2 cachedAreaIndex; cachedAreaIndex.x = i % cachedAreaDimen.x; cachedAreaIndex.y = i / cachedAreaDimen.x; int2 imagePixelIndex; imagePixelIndex = cachedAreaOrg + cachedAreaIndex; imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth); imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight); pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x]; } for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) { filterCache[i] = filter[i]; } barrier(CLK_LOCAL_MEM_FENCE); int2 imageIndex; imageIndex.x = imageAreaOrg.x + get_local_id(0); imageIndex.y = imageAreaOrg.y + get_local_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) { return; } int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & OpacityChannel) == 0) || (matte == 0)) { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float f = filterCache[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; cacheIndexX++; } cacheIndexY++; } } else { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float alpha = QuantumScale*(QuantumRange-p.w); float f = filterCache[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; cacheIndexX++; } cacheIndexY++; } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; }
__kernel void Convolve(const __global CLPixelType *input, __global CLPixelType *output, const uint imageWidth, const uint imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel) { int2 imageIndex; imageIndex.x = get_global_id(0); imageIndex.y = get_global_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) return; int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & OpacityChannel) == 0) || (matte == 0)) { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float f = filter[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; } } } else { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float alpha = QuantumScale*(QuantumRange-p.w); float f = filter[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; } } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; }
typedef enum { UndefinedFunction, PolynomialFunction, SinusoidFunction, ArcsinFunction, ArctanFunction } MagickFunction;
CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function, const unsigned int number_parameters, __constant float *parameters) { float4 result = (float4) 0.0f; switch (function) { case PolynomialFunction: { for (unsigned int i=0; i < number_parameters; i++) result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i]; result *= (float4)QuantumRange; break; } case SinusoidFunction: { float freq,phase,ampl,bias; freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f; ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result.x = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias); result.y = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias); result.z = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias); result.w = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias); break; } case ArcsinFunction: { float width,range,center,bias; width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center); result.x = range/MagickPI*asin(result.x)+bias; result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x; result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x; result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center); result.y = range/MagickPI*asin(result.y)+bias; result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y; result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y; result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center); result.z = range/MagickPI*asin(result.z)+bias; result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x; result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x; result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center); result.w = range/MagickPI*asin(result.w)+bias; result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w; result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w; result *= (float4)QuantumRange; break; } case ArctanFunction: { float slope,range,center,bias; slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center); result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias); break; } case UndefinedFunction: break; } return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y), ClampToQuantum(result.z), ClampToQuantum(result.w)); }
__kernel void FunctionImage(__global CLPixelType *im, const ChannelType channel, const MagickFunction function, const unsigned int number_parameters, __constant float *parameters) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; im[c] = ApplyFunction(im[c], function, number_parameters, parameters); }
__kernel void Stretch(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict stretch_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, opacity; oValue=im[c]; if ((channel & RedChannel) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = stretch_map[ePos]; red = getRed(eValue); } } if ((channel & GreenChannel) != 0) { if (getGreenF4(white) != getGreenF4(black)) { ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = stretch_map[ePos]; green = getGreen(eValue); } } if ((channel & BlueChannel) != 0) { if (getBlueF4(white) != getBlueF4(black)) { ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = stretch_map[ePos]; blue = getBlue(eValue); } } if ((channel & OpacityChannel) != 0) { if (getOpacityF4(white) != getOpacityF4(black)) { ePos = ScaleQuantumToMap(getOpacity(oValue)); eValue = stretch_map[ePos]; opacity = getOpacity(eValue); } } im[c]=(CLPixelType)(blue, green, red, opacity); }
__kernel void Equalize(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict equalize_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, opacity; oValue=im[c]; if ((channel & SyncChannels) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = equalize_map[ePos]; red = getRed(eValue); ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = equalize_map[ePos]; green = getRed(eValue); ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = equalize_map[ePos]; blue = getRed(eValue); ePos = ScaleQuantumToMap(getOpacity(oValue)); eValue = equalize_map[ePos]; opacity = getRed(eValue); im[c]=(CLPixelType)(blue, green, red, opacity); } } }
__kernel void Histogram(__global CLPixelType * restrict im, const ChannelType channel, const int method, const int colorspace, __global uint4 * restrict histogram) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; if ((channel & SyncChannels) != 0) { float intensity = GetPixelIntensity(method, colorspace,im[c]); uint pos = ScaleQuantumToMap(ClampToQuantum(intensity)); atomic_inc((__global uint *)(&(histogram[pos]))+2); } else { } }
__kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im, const ChannelType channel, __constant float *filter, const unsigned int width, const unsigned int imageColumns, const unsigned int imageRows, __local CLPixelType *temp) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const unsigned int radius = (width-1)/2; const int wsize = get_local_size(0); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) { temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)]; } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(0) < columns) { float4 result = (float4) 0; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); } } for ( ; i < width; i++) { result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); } result.x = ClampToQuantum(result.x); result.y = ClampToQuantum(result.y); result.z = ClampToQuantum(result.z); result.w = ClampToQuantum(result.w); filtered_im[y*columns+x] = result; } }
__kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im, const ChannelType channel, __constant float *filter, const unsigned int width, const unsigned int imageColumns, const unsigned int imageRows, __local CLPixelType *temp, const unsigned int offsetRows, const unsigned int section) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const unsigned int radius = (width-1)/2; const int wsize = get_local_size(0); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); im += imageColumns * (offsetRows - radius * section); for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) { temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)]; } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(0) < columns) { float4 result = (float4) 0; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); } } for ( ; i < width; i++) { result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); } result.x = ClampToQuantum(result.x); result.y = ClampToQuantum(result.y); result.z = ClampToQuantum(result.z); result.w = ClampToQuantum(result.w); filtered_im[y*columns+x] = result; } }
__kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im, const ChannelType channel, __constant float *filter, const unsigned int width, const unsigned int imageColumns, const unsigned int imageRows, __local float4 *temp) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const int rows = imageRows; unsigned int radius = (width-1)/2; const int wsize = get_local_size(1); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) { temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX]; } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(1) < rows) { float4 result = (float4) 0; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { result+=filter[i]*temp[i+get_local_id(1)]; } } for ( ; i < width; i++) { result+=filter[i]*temp[i+get_local_id(1)]; } result.x = ClampToQuantum(result.x); result.y = ClampToQuantum(result.y); result.z = ClampToQuantum(result.z); result.w = ClampToQuantum(result.w); filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w); } }
__kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im, const ChannelType channel, __constant float *filter, const unsigned int width, const unsigned int imageColumns, const unsigned int imageRows, __local float4 *temp, const unsigned int offsetRows, const unsigned int section) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const int rows = imageRows; unsigned int radius = (width-1)/2; const int wsize = get_local_size(1); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); blurRowData += imageColumns * radius * section; for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) { int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX; temp[i] = *(blurRowData+pos); } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(1) < rows) { float4 result = (float4) 0; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { result+=filter[i]*temp[i+get_local_id(1)]; } } for ( ; i < width; i++) { result+=filter[i]*temp[i+get_local_id(1)]; } result.x = ClampToQuantum(result.x); result.y = ClampToQuantum(result.y); result.z = ClampToQuantum(result.z); result.w = ClampToQuantum(result.w); filtered_im += imageColumns * offsetRows; filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w); } }
__kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage, const __global float4 *blurRowData, __global CLPixelType *filtered_im, const unsigned int imageColumns, const unsigned int imageRows, __local float4* cachedData, __local float* cachedFilter, const ChannelType channel, const __global float *filter, const unsigned int width, const float gain, const float threshold) { const unsigned int radius = (width-1)/2; const int groupX = get_group_id(0); const int groupStartY = get_group_id(1)*get_local_size(1) - radius; const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; if (groupStartY >= 0 && groupStopY < imageRows) { event_t e = async_work_group_strided_copy(cachedData ,blurRowData+groupStartY*imageColumns+groupX ,groupStopY-groupStartY,imageColumns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX]; } barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < imageRows) { float4 blurredPixel = (float4) 0.0f; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } } for ( ; i < width; i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y) ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w))); float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]); float4 outputPixel = inputImagePixel - blurredPixel; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y) ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w)); } } __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage, const __global float4 *blurRowData, __global CLPixelType *filtered_im, const unsigned int imageColumns, const unsigned int imageRows, __local float4* cachedData, __local float* cachedFilter, const ChannelType channel, const __global float *filter, const unsigned int width, const float gain, const float threshold, const unsigned int offsetRows, const unsigned int section) { const unsigned int radius = (width-1)/2; const int groupX = get_group_id(0); const int groupStartY = get_group_id(1)*get_local_size(1) - radius; const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; blurRowData += imageColumns * radius * section; if (groupStartY >= 0 && groupStopY < imageRows) { event_t e = async_work_group_strided_copy(cachedData ,blurRowData+groupStartY*imageColumns+groupX ,groupStopY-groupStartY,imageColumns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX; cachedData[i] = *(blurRowData + pos); } barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < imageRows) { float4 blurredPixel = (float4) 0.0f; int i = 0;
#ifndef UFACTOR
#define UFACTOR 8
#endif
for ( ; i+UFACTOR < width; ) {
#pragma unroll UFACTOR
for (int j=0; j < UFACTOR; j++, i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } } for ( ; i < width; i++) { blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; } blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y) ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w))); inputImage += imageColumns * offsetRows; filtered_im += imageColumns * offsetRows; float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]); float4 outputPixel = inputImagePixel - blurredPixel; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y) ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w)); } }
__kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im, __constant float *filter, const unsigned int width, const unsigned int imageColumns, const unsigned int imageRows, __local float4 *pixels, const float gain, const float threshold, const unsigned int justBlur) { const int x = get_global_id(0); const int y = get_global_id(1); const unsigned int radius = (width - 1) / 2; int row = y - radius; int baseRow = get_group_id(1) * get_local_size(1) - radius; int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius; while (row < endRow) { int srcy = (row < 0) ? -row : row; srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy; float4 value = 0.0f; int ix = x - radius; int i = 0; while (i + 7 < width) { for (int j = 0; j < 8; ++j) { int srcx = ix + j; srcx = (srcx < 0) ? -srcx : srcx; srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]); } ix += 8; i += 8; } while (i < width) { int srcx = (ix < 0) ? -ix : ix; srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]); ++i; ++ix; } pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value; row += get_local_size(1); } barrier(CLK_LOCAL_MEM_FENCE); const int px = get_local_id(0); const int py = get_local_id(1); const int prp = get_local_size(0); float4 value = (float4)(0.0f); int i = 0; while (i + 7 < width) { value += (float4)(filter[i]) * pixels[px + (py + i) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp]; value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp]; i += 8; } while (i < width) { value += (float4)(filter[i]) * pixels[px + (py + i) * prp]; ++i; } if (justBlur == 0) { float4 srcPixel = convert_float4(im[x + y * imageColumns]); float4 diff = srcPixel - value; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold); value = select(srcPixel + diff * gain, srcPixel, mask); } if ((x < imageColumns) && (y < imageRows)) filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3)); }
__kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; int2 clampedNeighbor; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; if (polarity > 0) {
#pragma unroll 4
for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i]; } } else {
#pragma unroll 4
for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i]; } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; }
__kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor, clampedNeighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; neighbor.y = y - offset.y; neighbor.x = x - offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType s = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; int ss[4]; ss[0] = (int)s.x; ss[1] = (int)s.y; ss[2] = (int)s.z; ss[3] = (int)s.w; if (polarity > 0) {
#pragma unroll 4
for (unsigned int i = 0; i < 4; i++) { sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1)); } } else {
#pragma unroll 4
for (unsigned int i = 0; i < 4; i++) { sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1)); } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; }
__kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im, const float4 bias, const unsigned int channel, const unsigned int matte, const float2 blurCenter, __constant float *cos_theta, __constant float *sin_theta, const unsigned int cossin_theta_size) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int rows = get_global_size(1); unsigned int step = 1; float center_x = (float) x - blurCenter.x; float center_y = (float) y - blurCenter.y; float radius = hypot(center_x, center_y); float blur_radius = hypot(blurCenter.x, blurCenter.y); if (radius > MagickEpsilon) { step = (unsigned int) (blur_radius / radius); if (step == 0) step = 1; if (step >= cossin_theta_size) step = cossin_theta_size-1; } float4 result; result.x = (float)bias.x; result.y = (float)bias.y; result.z = (float)bias.z; result.w = (float)bias.w; float normalize = 0.0f; if (((channel & OpacityChannel) == 0) || (matte == 0)) { for (unsigned int i=0; i<cossin_theta_size; i+=step) { result += convert_float4(im[ ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+ ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]); normalize += 1.0f; } normalize = PerceptibleReciprocal(normalize); result = result * normalize; } else { float gamma = 0.0f; for (unsigned int i=0; i<cossin_theta_size; i+=step) { float4 p = convert_float4(im[ ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+ ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]); float alpha = (float)(QuantumScale*(QuantumRange-p.w)); result.x += alpha * p.x; result.y += alpha * p.y; result.z += alpha * p.z; result.w += p.w; gamma+=alpha; normalize += 1.0f; } gamma = PerceptibleReciprocal(gamma); normalize = PerceptibleReciprocal(normalize); result.x = gamma*result.x; result.y = gamma*result.y; result.z = gamma*result.z; result.w = normalize*result.w; } filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y), ClampToQuantum(result.z), ClampToQuantum(result.w)); }
inline float3 ConvertRGBToHSB(CLPixelType pixel) { float3 HueSaturationBrightness; HueSaturationBrightness.x = 0.0f; HueSaturationBrightness.y = 0.0f; HueSaturationBrightness.z = 0.0f; float r=(float) getRed(pixel); float g=(float) getGreen(pixel); float b=(float) getBlue(pixel); float tmin=min(min(r,g),b); float tmax=max(max(r,g),b); if (tmax!=0.0f) { float delta=tmax-tmin; HueSaturationBrightness.y=delta/tmax; HueSaturationBrightness.z=QuantumScale*tmax; if (delta != 0.0f) { HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f)); HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta; HueSaturationBrightness.x/=6.0f; HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f; } } return HueSaturationBrightness; } inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) { float hue = HueSaturationBrightness.x; float brightness = HueSaturationBrightness.z; float saturation = HueSaturationBrightness.y; CLPixelType rgb; if (saturation == 0.0f) { setRed(&rgb,ClampToQuantum(QuantumRange*brightness)); setGreen(&rgb,getRed(rgb)); setBlue(&rgb,getRed(rgb)); } else { float h=6.0f*(hue-floor(hue)); float f=h-floor(h); float p=brightness*(1.0f-saturation); float q=brightness*(1.0f-saturation*f); float t=brightness*(1.0f-(saturation*(1.0f-f))); float clampedBrightness = ClampToQuantum(QuantumRange*brightness); float clamped_t = ClampToQuantum(QuantumRange*t); float clamped_p = ClampToQuantum(QuantumRange*p); float clamped_q = ClampToQuantum(QuantumRange*q); int ih = (int)h; setRed(&rgb, (ih == 1)?clamped_q: (ih == 2 || ih == 3)?clamped_p: (ih == 4)?clamped_t: clampedBrightness); setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness: (ih == 3)?clamped_q: (ih == 4 || ih == 5)?clamped_p: clamped_t); setBlue(&rgb, (ih == 2)?clamped_t: (ih == 3 || ih == 4)?clampedBrightness: (ih == 5)?clamped_q: clamped_p); } return rgb; } __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen) { const int sign = sharpen!=0?1:-1; const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; CLPixelType pixel = im[c]; float3 HueSaturationBrightness = ConvertRGBToHSB(pixel); float brightness = HueSaturationBrightness.z; brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness); brightness = clamp(brightness,0.0f,1.0f); HueSaturationBrightness.z = brightness; CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness); filteredPixel.w = pixel.w; im[c] = filteredPixel; }
inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue, float *hue, float *saturation, float *lightness) { float c, tmax, tmin; tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue)); tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue)); c=tmax-tmin; *lightness=(tmax+tmin)/2.0; if (c <= 0.0) { *hue=0.0; *saturation=0.0; return; } if (tmax == (QuantumScale*red)) { *hue=(QuantumScale*green-QuantumScale*blue)/c; if ((QuantumScale*green) < (QuantumScale*blue)) *hue+=6.0; } else if (tmax == (QuantumScale*green)) *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c; else *hue=4.0+(QuantumScale*red-QuantumScale*green)/c; *hue*=60.0/360.0; if (*lightness <= 0.5) *saturation=c/(2.0*(*lightness)); else *saturation=c/(2.0-2.0*(*lightness)); } inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float b, c, g, h, tmin, r, x; h=hue*360.0; if (lightness <= 0.5) c=2.0*lightness*saturation; else c=(2.0-2.0*lightness)*saturation; tmin=lightness-0.5*c; h-=360.0*floor(h/360.0); h/=60.0; x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0)); switch ((int) floor(h)) { case 0: { r=tmin+c; g=tmin+x; b=tmin; break; } case 1: { r=tmin+x; g=tmin+c; b=tmin; break; } case 2: { r=tmin; g=tmin+c; b=tmin+x; break; } case 3: { r=tmin; g=tmin+x; b=tmin+c; break; } case 4: { r=tmin+x; g=tmin; b=tmin+c; break; } case 5: { r=tmin+c; g=tmin; b=tmin+x; break; } default: { r=0.0; g=0.0; b=0.0; } } *red=ClampToQuantum(QuantumRange*r); *green=ClampToQuantum(QuantumRange*g); *blue=ClampToQuantum(QuantumRange*b); } inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float hue, lightness, saturation; ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness); hue+=0.5*(0.01*percent_hue-1.0); while (hue < 0.0) hue+=1.0; while (hue >= 1.0) hue-=1.0; saturation*=0.01*percent_saturation; lightness*=0.01*percent_lightness; ConvertHSLToRGB(hue,saturation,lightness,red,green,blue); } __kernel void Modulate(__global CLPixelType *im, const float percent_brightness, const float percent_hue, const float percent_saturation, const int colorspace) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; CLPixelType pixel = im[c]; CLQuantum blue, green, red; red=getRed(pixel); green=getGreen(pixel); blue=getBlue(pixel); switch (colorspace) { case HSLColorspace: default: { ModulateHSL(percent_hue, percent_saturation, percent_brightness, &red, &green, &blue); } } CLPixelType filteredPixel; setRed(&filteredPixel, red); setGreen(&filteredPixel, green); setBlue(&filteredPixel, blue); filteredPixel.w = pixel.w; im[c] = filteredPixel; }
__kernel void Grayscale(__global CLPixelType *im, const int method, const int colorspace) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; CLPixelType pixel = im[c]; float blue, green, intensity, red; red=(float)getRed(pixel); green=(float)getGreen(pixel); blue=(float)getBlue(pixel); intensity=0.0; CLPixelType filteredPixel; switch (method) { case AveragePixelIntensityMethod: { intensity=(red+green+blue)/3.0; break; } case BrightnessPixelIntensityMethod: { intensity=max(max(red,green),blue); break; } case LightnessPixelIntensityMethod: { intensity=(min(min(red,green),blue)+ max(max(red,green),blue))/2.0; break; } case MSPixelIntensityMethod: { intensity=(float) (((float) red*red+green*green+ blue*blue)/(3.0*QuantumRange)); break; } case Rec601LumaPixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec601LuminancePixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec709LumaPixelIntensityMethod: default: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case Rec709LuminancePixelIntensityMethod: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case RMSPixelIntensityMethod: { intensity=(float) (sqrt((float) red*red+green*green+ blue*blue)/sqrt(3.0)); break; } } setGray(&filteredPixel, ClampToQuantum(intensity)); filteredPixel.w = pixel.w; im[c] = filteredPixel; }
float BoxResizeFilter(const float x) { return 1.0f; }
float CubicBC(const float x,const __global float* resizeFilterCoefficients) { if (x < 1.0) return(resizeFilterCoefficients[0]+x*(x* (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2]))); if (x < 2.0) return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x* (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6]))); return(0.0); }
float Sinc(const float x) { if (x != 0.0f) { const float alpha=(float) (MagickPI*x); return sinpi(x)/alpha; } return(1.0f); }
float Triangle(const float x) { return ((x<1.0f)?(1.0f-x):0.0f); }
float Hanning(const float x) { const float cosine=cos((MagickPI*x)); return(0.5f+0.5f*cosine); }
float Hamming(const float x) { const float cosine=cos((MagickPI*x)); return(0.54f+0.46f*cosine); }
float Blackman(const float x) { const float cosine=cos((MagickPI*x)); return(0.34f+cosine*(0.5f+cosine*0.16f)); }
typedef enum { BoxWeightingFunction = 0, TriangleWeightingFunction, CubicBCWeightingFunction, HanningWeightingFunction, HammingWeightingFunction, BlackmanWeightingFunction, GaussianWeightingFunction, QuadraticWeightingFunction, JincWeightingFunction, SincWeightingFunction, SincFastWeightingFunction, KaiserWeightingFunction, WelshWeightingFunction, BohmanWeightingFunction, LagrangeWeightingFunction, CosineWeightingFunction, } ResizeWeightingFunctionType;
inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients) { switch (filterType) { case SincWeightingFunction: case SincFastWeightingFunction: return Sinc(x); case CubicBCWeightingFunction: return CubicBC(x,filterCoefficients); case BoxWeightingFunction: return BoxResizeFilter(x); case TriangleWeightingFunction: return Triangle(x); case HanningWeightingFunction: return Hanning(x); case HammingWeightingFunction: return Hamming(x); case BlackmanWeightingFunction: return Blackman(x); default: return 0.0f; } }
inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType , const ResizeWeightingFunctionType resizeWindowType , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x) { float scale; float xBlur = fabs(x/resizeFilterBlur); if (resizeWindowSupport < MagickEpsilon || resizeWindowType == BoxWeightingFunction) { scale = 1.0f; } else { scale = resizeFilterScale; scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients); } float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients); return weight; }
inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { return (numWorkItems/pixelPerWorkgroup); } inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems); int pixelIndex = itemID/numWorkItemsPerPixel; pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1; return pixelIndex; }
__kernel __attribute__((reqd_work_group_size(256, 1, 1))) void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows , const int resizeFilterType, const int resizeWindowType , const __global float* resizeFilterCubicCoefficients , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { const unsigned int startX = get_group_id(0)*pixelPerWorkgroup; const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns); const unsigned int actualNumPixelToCompute = stopX - startX; float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f); const float support = max(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0)); const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns); const unsigned int y = get_global_id(1); event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0); wait_group_events(1,&e); unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { const unsigned int chunkStartX = startX + chunk*pixelChunkSize; const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX); const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX; const unsigned int itemID = get_local_id(0); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0)); const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; if (pixelIndex != -1) { const int x = chunkStartX + pixelIndex; const float bisect = (x+0.5)/xFactor+MagickEpsilon; const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f); const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns); const unsigned int n = stop - start; unsigned int numStepsPerWorkItem = n / numItems; numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; if (startStep < n) { const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartX; if (matte == 0) { for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { float4 cp = convert_float4(inputImageCache[cacheIndex]); float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType , (ResizeWeightingFunctionType)resizeWindowType , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); filteredPixel += ((float4)weight)*cp; density+=weight; } } else { for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { CLPixelType p = inputImageCache[cacheIndex]; float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType , (ResizeWeightingFunctionType)resizeWindowType , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); float alpha = weight * QuantumScale * GetPixelAlpha(p); float4 cp = convert_float4(p); filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; density+=weight; gamma+=alpha; } } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; if (matte
!= 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (matte!=0) { gammaCache[pixelIndex]+=gamma; } } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { if (matte==0) { float density = densityCache[itemID]; float4 filteredPixel = outputPixelCache[itemID]; if (density!= 0.0f && density != 1.0) { density = PerceptibleReciprocal(density); filteredPixel *= (float4)density; } filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x) , ClampToQuantum(filteredPixel.y) , ClampToQuantum(filteredPixel.z) , ClampToQuantum(filteredPixel.w)); } else { float density = densityCache[itemID]; float gamma = gammaCache[itemID]; float4 filteredPixel = outputPixelCache[itemID]; if (density!= 0.0f && density != 1.0) { density = PerceptibleReciprocal(density); filteredPixel *= (float4)density; gamma *= density; } gamma = PerceptibleReciprocal(gamma); CLPixelType fp; fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x) , ClampToQuantum(gamma*filteredPixel.y) , ClampToQuantum(gamma*filteredPixel.z) , ClampToQuantum(filteredPixel.w)); filteredImage[y*filteredColumns+chunkStartX+itemID] = fp; } } } }
__kernel __attribute__((reqd_work_group_size(256, 1, 1))) void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows , const int resizeFilterType, const int resizeWindowType , const __global float* resizeFilterCubicCoefficients , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte ,xFactor, filteredImage, filteredColumns, filteredRows ,SincWeightingFunction, SincWeightingFunction ,resizeFilterCubicCoefficients ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize ,outputPixelCache, densityCache, gammaCache); }
__kernel __attribute__((reqd_work_group_size(1, 256, 1))) void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows , const int resizeFilterType, const int resizeWindowType , const __global float* resizeFilterCubicCoefficients , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { const unsigned int startY = get_group_id(1)*pixelPerWorkgroup; const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows); const unsigned int actualNumPixelToCompute = stopY - startY; float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f); const float support = max(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0)); const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows); const unsigned int x = get_global_id(0); event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0); wait_group_events(1,&e); unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { const unsigned int chunkStartY = startY + chunk*pixelChunkSize; const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY); const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY; const unsigned int itemID = get_local_id(1); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1)); const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; if (pixelIndex != -1) { const int y = chunkStartY + pixelIndex; const float bisect = (y+0.5)/yFactor+MagickEpsilon; const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f); const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows); const unsigned int n = stop - start; unsigned int numStepsPerWorkItem = n / numItems; numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; if (startStep < n) { const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartY; if (matte == 0) { for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { float4 cp = convert_float4(inputImageCache[cacheIndex]); float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType , (ResizeWeightingFunctionType)resizeWindowType , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); filteredPixel += ((float4)weight)*cp; density+=weight; } } else { for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { CLPixelType p = inputImageCache[cacheIndex]; float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType , (ResizeWeightingFunctionType)resizeWindowType , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); float alpha = weight * QuantumScale * GetPixelAlpha(p); float4 cp = convert_float4(p); filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; density+=weight; gamma+=alpha; } } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.
0f; if (matte != 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (matte!=0) { gammaCache[pixelIndex]+=gamma; } } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { if (matte==0) { float density = densityCache[itemID]; float4 filteredPixel = outputPixelCache[itemID]; if (density!= 0.0f && density != 1.0) { density = PerceptibleReciprocal(density); filteredPixel *= (float4)density; } filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x) , ClampToQuantum(filteredPixel.y) , ClampToQuantum(filteredPixel.z) , ClampToQuantum(filteredPixel.w)); } else { float density = densityCache[itemID]; float gamma = gammaCache[itemID]; float4 filteredPixel = outputPixelCache[itemID]; if (density!= 0.0f && density != 1.0) { density = PerceptibleReciprocal(density); filteredPixel *= (float4)density; gamma *= density; } gamma = PerceptibleReciprocal(gamma); CLPixelType fp; fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x) , ClampToQuantum(gamma*filteredPixel.y) , ClampToQuantum(gamma*filteredPixel.z) , ClampToQuantum(filteredPixel.w)); filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp; } } } }
__kernel __attribute__((reqd_work_group_size(1, 256, 1))) void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows , const int resizeFilterType, const int resizeWindowType , const __global float* resizeFilterCubicCoefficients , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte ,yFactor,filteredImage,filteredColumns,filteredRows ,SincWeightingFunction, SincWeightingFunction ,resizeFilterCubicCoefficients ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize ,outputPixelCache,densityCache,gammaCache); }
#define SigmaUniform (attenuate*0.015625f)
#define SigmaGaussian (attenuate*0.015625f)
#define SigmaImpulse (attenuate*0.1f)
#define SigmaLaplacian (attenuate*0.0390625f)
#define SigmaMultiplicativeGaussian (attenuate*0.5f)
#define SigmaPoisson (attenuate*12.5f)
#define SigmaRandom (attenuate)
#define TauGaussian (attenuate*0.078125f)
ulong MWC_AddMod64(ulong a, ulong b, ulong M) { ulong v=a+b; if( (v>=M) || (convert_float(v) < convert_float(a)) ) v=v-M; return v; } ulong MWC_MulMod64(ulong a, ulong b, ulong M) { ulong r=0; while(a!=0){ if(a&1) r=MWC_AddMod64(r,b,M); b=MWC_AddMod64(b,b,M); a=a>>1; } return r; } ulong MWC_PowMod64(ulong a, ulong e, ulong M) { ulong sqr=a, acc=1; while(e!=0){ if(e&1) acc=MWC_MulMod64(acc,sqr,M); sqr=MWC_MulMod64(sqr,sqr,M); e=e>>1; } return acc; } uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance) { ulong m=MWC_PowMod64(A, distance, M); ulong x=curr.x*(ulong)A+curr.y; x=MWC_MulMod64(x, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap) { enum{ MWC_BASEID = 4077358422479273989UL }; ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap; ulong m=MWC_PowMod64(A, dist, M); ulong x=MWC_MulMod64(MWC_BASEID, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } typedef struct{ uint x; uint c; } mwc64x_state_t; enum{ MWC64X_A = 4294883355U }; enum{ MWC64X_M = 18446383549859758079UL }; void MWC64X_Step(mwc64x_state_t *s) { uint X=s->x, C=s->c; uint Xn=MWC64X_A*X+C; uint carry=(uint)(Xn<C); uint Cn=mad_hi(MWC64X_A,X,carry); s->x=Xn; s->c=Cn; } void MWC64X_Skip(mwc64x_state_t *s, ulong distance) { uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance); s->x=tmp.x; s->c=tmp.y; } void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset) { uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset); s->x=tmp.x; s->c=tmp.y; } uint MWC64X_NextUint(mwc64x_state_t *s) { uint res=s->x ^ s->c; MWC64X_Step(s); return res; } typedef enum { UndefinedNoise, UniformNoise, GaussianNoise, MultiplicativeGaussianNoise, ImpulseNoise, LaplacianNoise, PoissonNoise, RandomNoise } NoiseType; float mwcReadPseudoRandomValue(mwc64x_state_t* rng) { return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); } float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) { float alpha, beta, noise, sigma; noise = 0.0f; alpha=mwcReadPseudoRandomValue(r); switch(noise_type) { case UniformNoise: default: { noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f)); break; } case GaussianNoise: { float gamma, tau; if (alpha == 0.0f) alpha=1.0f; beta=mwcReadPseudoRandomValue(r); gamma=sqrt(-2.0f*log(alpha)); sigma=gamma*cospi((2.0f*beta)); tau=gamma*sinpi((2.0f*beta)); noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+ QuantumRange*TauGaussian*tau); break; } case ImpulseNoise: { if (alpha < (SigmaImpulse/2.0f)) noise=0.0f; else if (alpha >= (1.0f-(SigmaImpulse/2.0f))) noise=(float)QuantumRange; else noise=(float)pixel; break; } case LaplacianNoise: { if (alpha <= 0.5f) { if (alpha <= MagickEpsilon) noise=(float) (pixel-QuantumRange); else noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+ 0.5f); break; } beta=1.0f-alpha; if (beta <= (0.5f*MagickEpsilon)) noise=(float) (pixel+QuantumRange); else noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f); break; } case MultiplicativeGaussianNoise: { sigma=1.0f; if (alpha > MagickEpsilon) sigma=sqrt(-2.0f*log(alpha)); beta=mwcReadPseudoRandomValue(r); noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma* cospi((float) (2.0f*beta))/2.0f); break; } case PoissonNoise: { float poisson; unsigned int i; poisson=exp(-SigmaPoisson*QuantumScale*pixel); for (i=0; alpha > poisson; i++) { beta=mwcReadPseudoRandomValue(r); alpha*=beta; } noise=(float) (QuantumRange*i/SigmaPoisson); break; } case RandomNoise: { noise=(float) (QuantumRange*SigmaRandom*alpha); break; } }; return noise; } __kernel void GenerateNoiseImage(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage ,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem ,const ChannelType channel ,const NoiseType noise_type, const float attenuate ,const unsigned int seed0, const unsigned int seed1 ,const unsigned int
numRandomNumbersPerPixel) { mwc64x_state_t rng; rng.x = seed0; rng.c = seed1; uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; uint offset = span * get_local_size(0) * get_group_id(0); MWC64X_SeedStreams(&rng, offset, span); uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0); uint count = pixelsPerWorkItem; while (count > 0) { if (pos < inputPixelCount) { CLPixelType p = inputImage[pos]; if ((channel&RedChannel)!=0) { setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate))); } if ((channel&GreenChannel)!=0) { setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate))); } if ((channel&BlueChannel)!=0) { setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate))); } if ((channel & OpacityChannel) != 0) { setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate))); } filteredImage[pos] = p; } pos += get_local_size(0); --count; } }
__kernel void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, const __global float *filter, const unsigned int width, const __global int2* offset, const float4 bias, const ChannelType channel, const unsigned int matte) { int2 currentPixel; currentPixel.x = get_global_id(0); currentPixel.y = get_global_id(1); if (currentPixel.x >= imageWidth || currentPixel.y >= imageHeight) return; float4 pixel; pixel.x = (float)bias.x; pixel.y = (float)bias.y; pixel.z = (float)bias.z; pixel.w = (float)bias.w; if (((channel & OpacityChannel) == 0) || (matte == 0)) { for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; pixel.x += (filter[i] * (float)samplePixelValue.x); pixel.y += (filter[i] * (float)samplePixelValue.y); pixel.z += (filter[i] * (float)samplePixelValue.z); pixel.w += (filter[i] * (float)samplePixelValue.w); } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } else { float gamma = 0.0f; for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; float alpha = QuantumScale*(QuantumRange-samplePixelValue.w); float k = filter[i]; pixel.x = pixel.x + k * alpha * samplePixelValue.x; pixel.y = pixel.y + k * alpha * samplePixelValue.y; pixel.z = pixel.z + k * alpha * samplePixelValue.z; pixel.w += k * alpha * samplePixelValue.w; gamma+=k*alpha; } gamma = PerceptibleReciprocal(gamma); pixel.xyz = gamma*pixel.xyz; CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } }
typedef enum { UndefinedCompositeOp, NoCompositeOp, ModulusAddCompositeOp, AtopCompositeOp, BlendCompositeOp, BumpmapCompositeOp, ChangeMaskCompositeOp, ClearCompositeOp, ColorBurnCompositeOp, ColorDodgeCompositeOp, ColorizeCompositeOp, CopyBlackCompositeOp, CopyBlueCompositeOp, CopyCompositeOp, CopyCyanCompositeOp, CopyGreenCompositeOp, CopyMagentaCompositeOp, CopyOpacityCompositeOp, CopyRedCompositeOp, CopyYellowCompositeOp, DarkenCompositeOp, DstAtopCompositeOp, DstCompositeOp, DstInCompositeOp, DstOutCompositeOp, DstOverCompositeOp, DifferenceCompositeOp, DisplaceCompositeOp, DissolveCompositeOp, ExclusionCompositeOp, HardLightCompositeOp, HueCompositeOp, InCompositeOp, LightenCompositeOp, LinearLightCompositeOp, LuminizeCompositeOp, MinusDstCompositeOp, ModulateCompositeOp, MultiplyCompositeOp, OutCompositeOp, OverCompositeOp, OverlayCompositeOp, PlusCompositeOp, ReplaceCompositeOp, SaturateCompositeOp, ScreenCompositeOp, SoftLightCompositeOp, SrcAtopCompositeOp, SrcCompositeOp, SrcInCompositeOp, SrcOutCompositeOp, SrcOverCompositeOp, ModulusSubtractCompositeOp, ThresholdCompositeOp, XorCompositeOp, DivideDstCompositeOp, DistortCompositeOp, BlurCompositeOp, PegtopLightCompositeOp, VividLightCompositeOp, PinLightCompositeOp, LinearDodgeCompositeOp, LinearBurnCompositeOp, MathematicsCompositeOp, DivideSrcCompositeOp, MinusSrcCompositeOp, DarkenIntensityCompositeOp, LightenIntensityCompositeOp } CompositeOperator;
inline float ColorDodge(const float Sca, const float Sa,const float Dca,const float Da) { if ((Sca*Da+Dca*Sa) >= Sa*Da) return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa)); return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa)); } inline void CompositeColorDodge(const float4 *p, const float4 *q,float4 *composite) { float Da, gamma, Sa; Sa=1.0f-QuantumScale*getOpacityF4(*p); Da=1.0f-QuantumScale*getOpacityF4(*q); gamma=RoundToUnity(Sa+Da-Sa*Da); setOpacityF4(composite, QuantumRange*(1.0-gamma)); gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma); setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale* getRedF4(*q)*Da,Da)); setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale* getGreenF4(*q)*Da,Da)); setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale* getBlueF4(*q)*Da,Da)); }
inline void MagickPixelCompositePlus(const float4 *p, const float alpha,const float4 *q, const float beta,float4 *composite) { float gamma; float Da, Sa; Sa=1.0-QuantumScale*alpha; Da=1.0-QuantumScale*beta; gamma=RoundToUnity(Sa+Da); setOpacityF4(composite,(float) QuantumRange*(1.0-gamma)); gamma=PerceptibleReciprocal(gamma); setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q))); setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q))); setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q))); }
inline void MagickPixelCompositeBlend(const float4 *p, const float alpha,const float4 *q, const float beta,float4 *composite) { MagickPixelCompositePlus(p,(float) (QuantumRange-alpha* (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta* (QuantumRange-getOpacityF4(*q))),composite); }
__kernel void Composite(__global CLPixelType *image, const unsigned int imageWidth, const unsigned int imageHeight, const __global CLPixelType *compositeImage, const unsigned int compositeWidth, const unsigned int compositeHeight, const unsigned int compose, const ChannelType channel, const unsigned int matte, const float destination_dissolve, const float source_dissolve) { uint2 index; index.x = get_global_id(0); index.y = get_global_id(1); if (index.x >= imageWidth || index.y >= imageHeight) { return; } const CLPixelType inputPixel = image[index.y*imageWidth+index.x]; float4 destination; setRedF4(&destination,getRed(inputPixel)); setGreenF4(&destination,getGreen(inputPixel)); setBlueF4(&destination,getBlue(inputPixel)); const CLPixelType compositePixel = compositeImage[index.y*imageWidth+index.x]; float4 source; setRedF4(&source,getRed(compositePixel)); setGreenF4(&source,getGreen(compositePixel)); setBlueF4(&source,getBlue(compositePixel)); if (matte != 0) { setOpacityF4(&destination,getOpacity(inputPixel)); setOpacityF4(&source,getOpacity(compositePixel)); } else { setOpacityF4(&destination,0.0f); setOpacityF4(&source,0.0f); } float4 composite=destination; CompositeOperator op = (CompositeOperator)compose; switch (op) { case ColorDodgeCompositeOp: CompositeColorDodge(&source,&destination,&composite); break; case BlendCompositeOp: MagickPixelCompositeBlend(&source,source_dissolve,&destination, destination_dissolve,&composite); break; default: break; }; CLPixelType outputPixel; setRed(&outputPixel, ClampToQuantum(getRedF4(composite))); setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite))); setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite))); setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite))); image[index.y*imageWidth+index.x] = outputPixel; }
Code: Select all
build options: -cl-single-precision-constant -cl-mad-enable -DCLQuantum=ushort -DCLSignedQuantum=short -DCLPixelType=ushort4 -DQuantumRange=65535.000000 -DQuantumScale=0.000015 -DCharQuantumScale=257.000000 -DMagickEpsilon=0.000000 -DMagickPI=3.141593 -DMaxMap=65535 -DMAGICKCORE_QUANTUM_DEPTH=16
input.cl:27:1501: warning: implicit declaration of function 'asin' is invalid in C99
input.cl:68:672: warning: implicit declaration of function 'async_work_group_strided_copy' is invalid in C99
input.cl:68:668: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:68:799: warning: implicit declaration of function 'wait_group_events' is invalid in C99
input.cl:68:1055: warning: implicit declaration of function 'async_work_group_copy' is invalid in C99
input.cl:68:1051: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:76:1650: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:76:2077: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:97:2314: warning: implicit declaration of function 'sinpi' is invalid in C99
input.cl:111:1414: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:113:1406: error: initializing 'event_t' with an expression of incompatible type 'int'
input.cl:131:2475: warning: implicit declaration of function 'cospi' is invalid in C99