1 #include "rs_core.rsh" 2 #include "rs_structs.h" 3 4 #include "rsCpuCoreRuntime.h" 5 6 extern float __attribute__((overloadable)) rsFrac(float v) { 7 int i = (int)floor(v); 8 return fmin(v - i, 0x1.fffffep-1f); 9 } 10 11 /* Function declarations from libRS */ 12 extern float4 __attribute__((overloadable)) convert_float4(uchar4 c); 13 14 /* Implementation of Core Runtime */ 15 16 extern float4 rsUnpackColor8888(uchar4 c) 17 { 18 return convert_float4(c) * 0.003921569f; 19 } 20 21 22 extern float __attribute__((overloadable)) rsClamp(float v, float l, float h) { 23 return clamp(v, l, h); 24 } 25 extern char __attribute__((overloadable)) rsClamp(char v, char l, char h) { 26 return clamp(v, l, h); 27 } 28 extern uchar __attribute__((overloadable)) rsClamp(uchar v, uchar l, uchar h) { 29 return clamp(v, l, h); 30 } 31 extern short __attribute__((overloadable)) rsClamp(short v, short l, short h) { 32 return clamp(v, l, h); 33 } 34 extern ushort __attribute__((overloadable)) rsClamp(ushort v, ushort l, ushort h) { 35 return clamp(v, l, h); 36 } 37 extern int __attribute__((overloadable)) rsClamp(int v, int l, int h) { 38 return clamp(v, l, h); 39 } 40 extern uint __attribute__((overloadable)) rsClamp(uint v, uint l, uint h) { 41 return clamp(v, l, h); 42 } 43 44 extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) { 45 return __sync_val_compare_and_swap(ptr, expectedValue, newValue); 46 } 47 48 extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) { 49 return __sync_val_compare_and_swap(ptr, expectedValue, newValue); 50 } 51 52 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) { 53 return __sync_fetch_and_add(ptr, 1); 54 } 55 56 extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile uint32_t *ptr) { 57 return __sync_fetch_and_add(ptr, 1); 58 } 59 60 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) { 61 return __sync_fetch_and_sub(ptr, 1); 62 } 63 64 extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile uint32_t *ptr) { 65 return __sync_fetch_and_sub(ptr, 1); 66 } 67 68 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) { 69 return __sync_fetch_and_add(ptr, value); 70 } 71 72 extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile uint32_t *ptr, uint32_t value) { 73 return __sync_fetch_and_add(ptr, value); 74 } 75 76 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) { 77 return __sync_fetch_and_sub(ptr, value); 78 } 79 80 extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile uint32_t *ptr, uint32_t value) { 81 return __sync_fetch_and_sub(ptr, value); 82 } 83 84 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) { 85 return __sync_fetch_and_and(ptr, value); 86 } 87 88 extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile uint32_t *ptr, uint32_t value) { 89 return __sync_fetch_and_and(ptr, value); 90 } 91 92 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) { 93 return __sync_fetch_and_or(ptr, value); 94 } 95 96 extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile uint32_t *ptr, uint32_t value) { 97 return __sync_fetch_and_or(ptr, value); 98 } 99 100 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) { 101 return __sync_fetch_and_xor(ptr, value); 102 } 103 104 extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile uint32_t *ptr, uint32_t value) { 105 return __sync_fetch_and_xor(ptr, value); 106 } 107 108 extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t); 109 extern int32_t __attribute__((overloadable)) min(int32_t, int32_t); 110 extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t); 111 extern int32_t __attribute__((overloadable)) max(int32_t, int32_t); 112 113 extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) { 114 uint32_t prev, status; 115 do { 116 prev = *ptr; 117 uint32_t n = min(value, prev); 118 status = __sync_val_compare_and_swap(ptr, prev, n); 119 } while (status != prev); 120 return prev; 121 } 122 123 extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) { 124 int32_t prev, status; 125 do { 126 prev = *ptr; 127 int32_t n = min(value, prev); 128 status = __sync_val_compare_and_swap(ptr, prev, n); 129 } while (status != prev); 130 return prev; 131 } 132 133 extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) { 134 uint32_t prev, status; 135 do { 136 prev = *ptr; 137 uint32_t n = max(value, prev); 138 status = __sync_val_compare_and_swap(ptr, prev, n); 139 } while (status != prev); 140 return prev; 141 } 142 143 extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) { 144 int32_t prev, status; 145 do { 146 prev = *ptr; 147 int32_t n = max(value, prev); 148 status = __sync_val_compare_and_swap(ptr, prev, n); 149 } while (status != prev); 150 return prev; 151 } 152 153 154 155 extern int32_t rand(); 156 #define RAND_MAX 0x7fffffff 157 158 159 160 extern float __attribute__((overloadable)) rsRand(float min, float max);/* { 161 float r = (float)rand(); 162 r /= RAND_MAX; 163 r = r * (max - min) + min; 164 return r; 165 } 166 */ 167 168 extern float __attribute__((overloadable)) rsRand(float max) { 169 return rsRand(0.f, max); 170 //float r = (float)rand(); 171 //r *= max; 172 //r /= RAND_MAX; 173 //return r; 174 } 175 176 extern int __attribute__((overloadable)) rsRand(int max) { 177 return (int)rsRand((float)max); 178 } 179 180 extern int __attribute__((overloadable)) rsRand(int min, int max) { 181 return (int)rsRand((float)min, (float)max); 182 } 183 184 extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) { 185 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0]; 186 } 187 188 extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) { 189 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1]; 190 } 191 192 extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) { 193 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2]; 194 } 195 196 extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) { 197 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3]; 198 } 199 200 extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) { 201 return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face); 202 } 203 204 extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) { 205 return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod; 206 } 207 208 extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) { 209 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x; 210 } 211 212 extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) { 213 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y; 214 } 215 216 extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) { 217 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z; 218 } 219 220 extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) { 221 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0]; 222 } 223 224 extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) { 225 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1]; 226 } 227 228 extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) { 229 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2]; 230 } 231 232 extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) { 233 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3]; 234 } 235 236 extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) { 237 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0; 238 } 239 240 extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) { 241 return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod; 242 } 243 244 #define PRIM_DEBUG(T) \ 245 extern void __attribute__((overloadable)) rsDebug(const char *, const T *); \ 246 void __attribute__((overloadable)) rsDebug(const char *txt, T val) { \ 247 rsDebug(txt, &val); \ 248 } 249 250 PRIM_DEBUG(char2) 251 PRIM_DEBUG(char3) 252 PRIM_DEBUG(char4) 253 PRIM_DEBUG(uchar2) 254 PRIM_DEBUG(uchar3) 255 PRIM_DEBUG(uchar4) 256 PRIM_DEBUG(short2) 257 PRIM_DEBUG(short3) 258 PRIM_DEBUG(short4) 259 PRIM_DEBUG(ushort2) 260 PRIM_DEBUG(ushort3) 261 PRIM_DEBUG(ushort4) 262 PRIM_DEBUG(int2) 263 PRIM_DEBUG(int3) 264 PRIM_DEBUG(int4) 265 PRIM_DEBUG(uint2) 266 PRIM_DEBUG(uint3) 267 PRIM_DEBUG(uint4) 268 PRIM_DEBUG(long2) 269 PRIM_DEBUG(long3) 270 PRIM_DEBUG(long4) 271 PRIM_DEBUG(ulong2) 272 PRIM_DEBUG(ulong3) 273 PRIM_DEBUG(ulong4) 274 PRIM_DEBUG(float2) 275 PRIM_DEBUG(float3) 276 PRIM_DEBUG(float4) 277 PRIM_DEBUG(double2) 278 PRIM_DEBUG(double3) 279 PRIM_DEBUG(double4) 280 281 #undef PRIM_DEBUG 282 283 // Convert the half values to float before handing off to the driver. This 284 // eliminates the need in the driver to properly support the half datatype 285 // (either by adding compiler flags for half or link against compiler_rt). 286 // Also, pass the bit-equivalent ushort to be printed. 287 extern void __attribute__((overloadable)) rsDebug(const char *s, float f, 288 ushort us); 289 extern void __attribute__((overloadable)) rsDebug(const char *s, half h) { 290 rsDebug(s, (float) h, *(ushort *) &h); 291 } 292 293 extern void __attribute__((overloadable)) rsDebug(const char *s, 294 const float2 *f, 295 const ushort2 *us); 296 extern void __attribute__((overloadable)) rsDebug(const char *s, half2 h2) { 297 float2 f = convert_float2(h2); 298 rsDebug(s, &f, (ushort2 *) &h2); 299 } 300 301 extern void __attribute__((overloadable)) rsDebug(const char *s, 302 const float3 *f, 303 const ushort3 *us); 304 extern void __attribute__((overloadable)) rsDebug(const char *s, half3 h3) { 305 float3 f = convert_float3(h3); 306 rsDebug(s, &f, (ushort3 *) &h3); 307 } 308 309 extern void __attribute__((overloadable)) rsDebug(const char *s, 310 const float4 *f, 311 const ushort4 *us); 312 extern void __attribute__((overloadable)) rsDebug(const char *s, half4 h4) { 313 float4 f = convert_float4(h4); 314 rsDebug(s, &f, (ushort4 *) &h4); 315 } 316