1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file format_conversion.h 24 * 25 * @brief API implementation 26 * 27 ******************************************************************************/ 28 #include "format_types.h" 29 #include "format_traits.h" 30 31 ////////////////////////////////////////////////////////////////////////// 32 /// @brief Load SIMD packed pixels in SOA format and converts to 33 /// SOA RGBA32_FLOAT format. 34 /// @param pSrc - source data in SOA form 35 /// @param dst - output data in SOA form 36 template<SWR_FORMAT SrcFormat> 37 INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst) 38 { 39 // fast path for float32 40 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32)) 41 { 42 auto lambda = [&](int comp) 43 { 44 simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar))); 45 46 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 47 }; 48 49 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 50 return; 51 } 52 53 auto lambda = [&](int comp) 54 { 55 // load SIMD components 56 simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc); 57 58 // unpack 59 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp); 60 61 // convert 62 if (FormatTraits<SrcFormat>::isNormalized(comp)) 63 { 64 vComp = _simd_cvtepi32_ps(_simd_castps_si(vComp)); 65 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<SrcFormat>::toFloat(comp))); 66 } 67 68 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 69 70 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8; 71 }; 72 73 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 74 } 75 76 ////////////////////////////////////////////////////////////////////////// 77 /// @brief Clamps the given component based on the requirements on the 78 /// Format template arg 79 /// @param vComp - SIMD vector of floats 80 /// @param Component - component 81 template<SWR_FORMAT Format> 82 INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component) 83 { 84 simdscalar vComp = vC; 85 if (FormatTraits<Format>::isNormalized(Component)) 86 { 87 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) 88 { 89 vComp = _simd_max_ps(vComp, _simd_setzero_ps()); 90 } 91 92 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM) 93 { 94 vComp = _simd_max_ps(vComp, _simd_set1_ps(-1.0f)); 95 } 96 vComp = _simd_min_ps(vComp, _simd_set1_ps(1.0f)); 97 } 98 else if (FormatTraits<Format>::GetBPC(Component) < 32) 99 { 100 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT) 101 { 102 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1; 103 int iMin = 0; 104 simdscalari vCompi = _simd_castps_si(vComp); 105 vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin)); 106 vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax)); 107 vComp = _simd_castsi_ps(vCompi); 108 } 109 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT) 110 { 111 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1; 112 int iMin = -1 - iMax; 113 simdscalari vCompi = _simd_castps_si(vComp); 114 vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin)); 115 vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax)); 116 vComp = _simd_castsi_ps(vCompi); 117 } 118 } 119 120 return vComp; 121 } 122 123 ////////////////////////////////////////////////////////////////////////// 124 /// @brief Normalize the given component based on the requirements on the 125 /// Format template arg 126 /// @param vComp - SIMD vector of floats 127 /// @param Component - component 128 template<SWR_FORMAT Format> 129 INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component) 130 { 131 simdscalar vComp = vC; 132 if (FormatTraits<Format>::isNormalized(Component)) 133 { 134 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component))); 135 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp)); 136 } 137 return vComp; 138 } 139 140 ////////////////////////////////////////////////////////////////////////// 141 /// @brief Convert and store simdvector of pixels in SOA 142 /// RGBA32_FLOAT to SOA format 143 /// @param src - source data in SOA form 144 /// @param dst - output data in SOA form 145 template<SWR_FORMAT DstFormat> 146 INLINE void StoreSOA(const simdvector &src, uint8_t *pDst) 147 { 148 // fast path for float32 149 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32)) 150 { 151 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp) 152 { 153 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 154 155 // Gamma-correct 156 if (FormatTraits<DstFormat>::isSRGB) 157 { 158 if (comp < 3) // Input format is always RGBA32_FLOAT. 159 { 160 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 161 } 162 } 163 164 _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp); 165 } 166 return; 167 } 168 169 auto lambda = [&](int comp) 170 { 171 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 172 173 // Gamma-correct 174 if (FormatTraits<DstFormat>::isSRGB) 175 { 176 if (comp < 3) // Input format is always RGBA32_FLOAT. 177 { 178 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 179 } 180 } 181 182 // clamp 183 vComp = Clamp<DstFormat>(vComp, comp); 184 185 // normalize 186 vComp = Normalize<DstFormat>(vComp, comp); 187 188 // pack 189 vComp = FormatTraits<DstFormat>::pack(comp, vComp); 190 191 // store 192 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp); 193 194 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8; 195 }; 196 197 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda); 198 } 199 200 #if ENABLE_AVX512_SIMD16 201 ////////////////////////////////////////////////////////////////////////// 202 /// @brief Load SIMD packed pixels in SOA format and converts to 203 /// SOA RGBA32_FLOAT format. 204 /// @param pSrc - source data in SOA form 205 /// @param dst - output data in SOA form 206 template<SWR_FORMAT SrcFormat> 207 INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst) 208 { 209 // fast path for float32 210 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32)) 211 { 212 auto lambda = [&](int comp) 213 { 214 simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar))); 215 216 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 217 }; 218 219 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 220 return; 221 } 222 223 auto lambda = [&](int comp) 224 { 225 // load SIMD components 226 simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc); 227 228 // unpack 229 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp); 230 231 // convert 232 if (FormatTraits<SrcFormat>::isNormalized(comp)) 233 { 234 vComp = _simd16_cvtepi32_ps(_simd16_castps_si(vComp)); 235 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<SrcFormat>::toFloat(comp))); 236 } 237 238 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 239 240 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8; 241 }; 242 243 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 244 } 245 246 ////////////////////////////////////////////////////////////////////////// 247 /// @brief Clamps the given component based on the requirements on the 248 /// Format template arg 249 /// @param vComp - SIMD vector of floats 250 /// @param Component - component 251 template<SWR_FORMAT Format> 252 INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component) 253 { 254 simd16scalar vComp = v; 255 if (FormatTraits<Format>::isNormalized(Component)) 256 { 257 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) 258 { 259 vComp = _simd16_max_ps(vComp, _simd16_setzero_ps()); 260 } 261 262 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM) 263 { 264 vComp = _simd16_max_ps(vComp, _simd16_set1_ps(-1.0f)); 265 } 266 vComp = _simd16_min_ps(vComp, _simd16_set1_ps(1.0f)); 267 } 268 else if (FormatTraits<Format>::GetBPC(Component) < 32) 269 { 270 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT) 271 { 272 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1; 273 int iMin = 0; 274 simd16scalari vCompi = _simd16_castps_si(vComp); 275 vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin)); 276 vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax)); 277 vComp = _simd16_castsi_ps(vCompi); 278 } 279 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT) 280 { 281 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1; 282 int iMin = -1 - iMax; 283 simd16scalari vCompi = _simd16_castps_si(vComp); 284 vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin)); 285 vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax)); 286 vComp = _simd16_castsi_ps(vCompi); 287 } 288 } 289 290 return vComp; 291 } 292 293 ////////////////////////////////////////////////////////////////////////// 294 /// @brief Normalize the given component based on the requirements on the 295 /// Format template arg 296 /// @param vComp - SIMD vector of floats 297 /// @param Component - component 298 template<SWR_FORMAT Format> 299 INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Component) 300 { 301 simd16scalar r = vComp; 302 if (FormatTraits<Format>::isNormalized(Component)) 303 { 304 r = _simd16_mul_ps(r, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component))); 305 r = _simd16_castsi_ps(_simd16_cvtps_epi32(r)); 306 } 307 return r; 308 } 309 310 ////////////////////////////////////////////////////////////////////////// 311 /// @brief Convert and store simdvector of pixels in SOA 312 /// RGBA32_FLOAT to SOA format 313 /// @param src - source data in SOA form 314 /// @param dst - output data in SOA form 315 template<SWR_FORMAT DstFormat> 316 INLINE void SIMDCALL StoreSOA(const simd16vector &src, uint8_t *pDst) 317 { 318 // fast path for float32 319 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32)) 320 { 321 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp) 322 { 323 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 324 325 // Gamma-correct 326 if (FormatTraits<DstFormat>::isSRGB) 327 { 328 if (comp < 3) // Input format is always RGBA32_FLOAT. 329 { 330 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 331 } 332 } 333 334 _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp); 335 } 336 return; 337 } 338 339 auto lambda = [&](int comp) 340 { 341 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 342 343 // Gamma-correct 344 if (FormatTraits<DstFormat>::isSRGB) 345 { 346 if (comp < 3) // Input format is always RGBA32_FLOAT. 347 { 348 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 349 } 350 } 351 352 // clamp 353 vComp = Clamp<DstFormat>(vComp, comp); 354 355 // normalize 356 vComp = Normalize<DstFormat>(vComp, comp); 357 358 // pack 359 vComp = FormatTraits<DstFormat>::pack(comp, vComp); 360 361 // store 362 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp); 363 364 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8; 365 }; 366 367 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda); 368 } 369 370 #endif 371