Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file utils.h
     24 *
     25 * @brief Utilities used by SWR core.
     26 *
     27 ******************************************************************************/
     28 #pragma once
     29 
     30 #include <string.h>
     31 #include <type_traits>
     32 #include <algorithm>
     33 #include "common/os.h"
     34 #include "common/intrin.h"
     35 #include "common/swr_assert.h"
     36 #include "core/api.h"
     37 
     38 struct simdBBox
     39 {
     40     simdscalari ymin;
     41     simdscalari ymax;
     42     simdscalari xmin;
     43     simdscalari xmax;
     44 };
     45 
     46 #if ENABLE_AVX512_SIMD16
     47 struct simd16BBox
     48 {
     49     simd16scalari ymin;
     50     simd16scalari ymax;
     51     simd16scalari xmin;
     52     simd16scalari xmax;
     53 };
     54 #endif
     55 
     56 template<typename SIMD_T>
     57 struct SIMDBBOX_T
     58 {
     59     typename SIMD_T::Integer            ymin;
     60     typename SIMD_T::Integer            ymax;
     61     typename SIMD_T::Integer            xmin;
     62     typename SIMD_T::Integer            xmax;
     63 };
     64 
     65 // helper function to unroll loops
     66 template<int Begin, int End, int Step = 1>
     67 struct UnrollerL {
     68     template<typename Lambda>
     69     INLINE static void step(Lambda& func) {
     70         func(Begin);
     71         UnrollerL<Begin + Step, End, Step>::step(func);
     72     }
     73 };
     74 
     75 template<int End, int Step>
     76 struct UnrollerL<End, End, Step> {
     77     template<typename Lambda>
     78     static void step(Lambda& func) {
     79     }
     80 };
     81 
     82 // helper function to unroll loops, with mask to skip specific iterations
     83 template<int Begin, int End, int Step = 1, int Mask = 0x7f>
     84 struct UnrollerLMask {
     85     template<typename Lambda>
     86     INLINE static void step(Lambda& func) {
     87         if(Mask & (1 << Begin))
     88         {
     89             func(Begin);
     90         }
     91         UnrollerL<Begin + Step, End, Step>::step(func);
     92     }
     93 };
     94 
     95 template<int End, int Step, int Mask>
     96 struct UnrollerLMask<End, End, Step, Mask> {
     97     template<typename Lambda>
     98     static void step(Lambda& func) {
     99     }
    100 };
    101 
    102 // general CRC compute
    103 INLINE
    104 uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
    105 {
    106 #if defined(_WIN64) || defined(__x86_64__)
    107     uint32_t sizeInQwords = size / sizeof(uint64_t);
    108     uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
    109     uint64_t* pDataWords = (uint64_t*)pData;
    110     for (uint32_t i = 0; i < sizeInQwords; ++i)
    111     {
    112         crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
    113     }
    114 #else
    115     uint32_t sizeInDwords = size / sizeof(uint32_t);
    116     uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
    117     uint32_t* pDataWords = (uint32_t*)pData;
    118     for (uint32_t i = 0; i < sizeInDwords; ++i)
    119     {
    120         crc = _mm_crc32_u32(crc, *pDataWords++);
    121     }
    122 #endif
    123 
    124     uint8_t* pRemainderBytes = (uint8_t*)pDataWords;
    125     for (uint32_t i = 0; i < sizeRemainderBytes; ++i)
    126     {
    127         crc = _mm_crc32_u8(crc, *pRemainderBytes++);
    128     }
    129 
    130     return crc;
    131 }
    132 
    133 //////////////////////////////////////////////////////////////////////////
    134 /// Check specified bit within a data word
    135 //////////////////////////////////////////////////////////////////////////
    136 template <typename T>
    137 INLINE
    138 static bool CheckBit(T word, uint32_t bit)
    139 {
    140     return 0 != (word & (T(1) << bit));
    141 }
    142 
    143 //////////////////////////////////////////////////////////////////////////
    144 /// Add byte offset to any-type pointer
    145 //////////////////////////////////////////////////////////////////////////
    146 template <typename T>
    147 INLINE
    148 static T* PtrAdd(T* p, intptr_t offset)
    149 {
    150     intptr_t intp = reinterpret_cast<intptr_t>(p);
    151     return reinterpret_cast<T*>(intp + offset);
    152 }
    153 
    154 //////////////////////////////////////////////////////////////////////////
    155 /// Is a power-of-2?
    156 //////////////////////////////////////////////////////////////////////////
    157 template <typename T>
    158 INLINE
    159 static bool IsPow2(T value)
    160 {
    161     return value == (value & (T(0) - value));
    162 }
    163 
    164 //////////////////////////////////////////////////////////////////////////
    165 /// Align down to specified alignment
    166 /// Note: IsPow2(alignment) MUST be true
    167 //////////////////////////////////////////////////////////////////////////
    168 template <typename T1, typename T2>
    169 INLINE
    170 static T1 AlignDownPow2(T1 value, T2 alignment)
    171 {
    172     SWR_ASSERT(IsPow2(alignment));
    173     return value & ~T1(alignment - 1);
    174 }
    175 
    176 //////////////////////////////////////////////////////////////////////////
    177 /// Align up to specified alignment
    178 /// Note: IsPow2(alignment) MUST be true
    179 //////////////////////////////////////////////////////////////////////////
    180 template <typename T1, typename T2>
    181 INLINE
    182 static T1 AlignUpPow2(T1 value, T2 alignment)
    183 {
    184     return AlignDownPow2(value + T1(alignment - 1), alignment);
    185 }
    186 
    187 //////////////////////////////////////////////////////////////////////////
    188 /// Align up ptr to specified alignment
    189 /// Note: IsPow2(alignment) MUST be true
    190 //////////////////////////////////////////////////////////////////////////
    191 template <typename T1, typename T2>
    192 INLINE
    193 static T1* AlignUpPow2(T1* value, T2 alignment)
    194 {
    195     return reinterpret_cast<T1*>(
    196         AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
    197 }
    198 
    199 //////////////////////////////////////////////////////////////////////////
    200 /// Align down to specified alignment
    201 //////////////////////////////////////////////////////////////////////////
    202 template <typename T1, typename T2>
    203 INLINE
    204 static T1 AlignDown(T1 value, T2 alignment)
    205 {
    206     if (IsPow2(alignment)) { return AlignDownPow2(value, alignment); }
    207     return value - T1(value % alignment);
    208 }
    209 
    210 //////////////////////////////////////////////////////////////////////////
    211 /// Align down to specified alignment
    212 //////////////////////////////////////////////////////////////////////////
    213 template <typename T1, typename T2>
    214 INLINE
    215 static T1* AlignDown(T1* value, T2 alignment)
    216 {
    217     return (T1*)AlignDown(uintptr_t(value), alignment);
    218 }
    219 
    220 //////////////////////////////////////////////////////////////////////////
    221 /// Align up to specified alignment
    222 /// Note: IsPow2(alignment) MUST be true
    223 //////////////////////////////////////////////////////////////////////////
    224 template <typename T1, typename T2>
    225 INLINE
    226 static T1 AlignUp(T1 value, T2 alignment)
    227 {
    228     return AlignDown(value + T1(alignment - 1), alignment);
    229 }
    230 
    231 //////////////////////////////////////////////////////////////////////////
    232 /// Align up to specified alignment
    233 /// Note: IsPow2(alignment) MUST be true
    234 //////////////////////////////////////////////////////////////////////////
    235 template <typename T1, typename T2>
    236 INLINE
    237 static T1* AlignUp(T1* value, T2 alignment)
    238 {
    239     return AlignDown(PtrAdd(value, alignment - 1), alignment);
    240 }
    241 
    242 //////////////////////////////////////////////////////////////////////////
    243 /// Helper structure used to access an array of elements that don't
    244 /// correspond to a typical word size.
    245 //////////////////////////////////////////////////////////////////////////
    246 template<typename T, size_t BitsPerElementT, size_t ArrayLenT>
    247 class BitsArray
    248 {
    249 private:
    250     static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
    251     static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
    252     static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
    253     static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
    254 
    255     static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
    256         "Element size must an integral fraction of pointer size");
    257 
    258     size_t              m_words[NUM_WORDS] = {};
    259 
    260 public:
    261 
    262     T operator[] (size_t elementIndex) const
    263     {
    264         size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
    265         word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
    266         return T(word & ELEMENT_MASK);
    267     }
    268 };
    269 
    270 // Ranged integer argument for TemplateArgUnroller
    271 template <uint32_t TMin, uint32_t TMax>
    272 struct IntArg
    273 {
    274     uint32_t val;
    275 };
    276 
    277 // Recursive template used to auto-nest conditionals.  Converts dynamic boolean function
    278 // arguments to static template arguments.
    279 template <typename TermT, typename... ArgsB>
    280 struct TemplateArgUnroller
    281 {
    282     //-----------------------------------------
    283     // Boolean value
    284     //-----------------------------------------
    285 
    286     // Last Arg Terminator
    287     static typename TermT::FuncType GetFunc(bool bArg)
    288     {
    289         if (bArg)
    290         {
    291             return TermT::template GetFunc<ArgsB..., std::true_type>();
    292         }
    293 
    294         return TermT::template GetFunc<ArgsB..., std::false_type>();
    295     }
    296 
    297     // Recursively parse args
    298     template <typename... TArgsT>
    299     static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
    300     {
    301         if (bArg)
    302         {
    303             return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
    304         }
    305 
    306         return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
    307     }
    308 
    309     //-----------------------------------------
    310     // Integer value (within specified range)
    311     //-----------------------------------------
    312 
    313     // Last Arg Terminator
    314     template <uint32_t TMin, uint32_t TMax>
    315     static typename TermT::FuncType GetFunc(IntArg<TMin, TMax> iArg)
    316     {
    317         if (iArg.val == TMax)
    318         {
    319             return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, TMax>>();
    320         }
    321         if (TMax > TMin)
    322         {
    323             return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(IntArg<TMin, TMax-1>{iArg.val});
    324         }
    325         SWR_ASSUME(false); return nullptr;
    326     }
    327     template <uint32_t TVal>
    328     static typename TermT::FuncType GetFunc(IntArg<TVal, TVal> iArg)
    329     {
    330         SWR_ASSERT(iArg.val == TVal);
    331         return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, TVal>>();
    332     }
    333 
    334     // Recursively parse args
    335     template <uint32_t TMin, uint32_t TMax, typename... TArgsT>
    336     static typename TermT::FuncType GetFunc(IntArg<TMin, TMax> iArg, TArgsT... remainingArgs)
    337     {
    338         if (iArg.val == TMax)
    339         {
    340             return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, TMax>>::GetFunc(remainingArgs...);
    341         }
    342         if (TMax > TMin)
    343         {
    344             return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(IntArg<TMin, TMax - 1>{iArg.val}, remainingArgs...);
    345         }
    346         SWR_ASSUME(false); return nullptr;
    347     }
    348     template <uint32_t TVal, typename... TArgsT>
    349     static typename TermT::FuncType GetFunc(IntArg<TVal, TVal> iArg, TArgsT... remainingArgs)
    350     {
    351         SWR_ASSERT(iArg.val == TVal);
    352         return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, TVal>>::GetFunc(remainingArgs...);
    353     }
    354 };
    355 
    356 //////////////////////////////////////////////////////////////////////////
    357 /// Helpers used to get / set environment variable
    358 //////////////////////////////////////////////////////////////////////////
    359 static INLINE std::string GetEnv(const std::string& variableName)
    360 {
    361     std::string output;
    362 #if defined(_WIN32)
    363     DWORD valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
    364     if (!valueSize) return output;
    365     output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
    366     GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
    367 #else
    368     char *env = getenv(variableName.c_str());
    369     output = env ? env : "";
    370 #endif
    371 
    372     return output;
    373 }
    374 
    375 static INLINE void SetEnv(const std::string& variableName, const std::string& value)
    376 {
    377 #if defined(_WIN32)
    378     SetEnvironmentVariableA(variableName.c_str(), value.c_str());
    379 #else
    380     setenv(variableName.c_str(), value.c_str(), true);
    381 #endif
    382 }
    383 
    384