Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file fifo.hpp
     24 *
     25 * @brief Definitions for our fifos used for thread communication.
     26 *
     27 ******************************************************************************/
     28 #pragma once
     29 
     30 
     31 #include "common/os.h"
     32 #include "arena.h"
     33 
     34 #include <vector>
     35 #include <cassert>
     36 
     37 template<class T>
     38 struct QUEUE
     39 {
     40     OSALIGNLINE(volatile uint32_t) mLock{ 0 };
     41     OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
     42     std::vector<T*> mBlocks;
     43     T* mCurBlock{ nullptr };
     44     uint32_t mHead{ 0 };
     45     uint32_t mTail{ 0 };
     46     uint32_t mCurBlockIdx{ 0 };
     47 
     48     // power of 2
     49     static const uint32_t mBlockSizeShift = 6;
     50     static const uint32_t mBlockSize = 1 << mBlockSizeShift;
     51 
     52     template <typename ArenaT>
     53     void clear(ArenaT& arena)
     54     {
     55         mHead = 0;
     56         mTail = 0;
     57         mBlocks.clear();
     58         T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
     59         mBlocks.push_back(pNewBlock);
     60         mCurBlock = pNewBlock;
     61         mCurBlockIdx = 0;
     62         mNumEntries = 0;
     63         mLock = 0;
     64     }
     65 
     66     uint32_t getNumQueued()
     67     {
     68         return mNumEntries;
     69     }
     70 
     71     bool tryLock()
     72     {
     73         if (mLock)
     74         {
     75             return false;
     76         }
     77 
     78         // try to lock the FIFO
     79         LONG initial = InterlockedCompareExchange(&mLock, 1, 0);
     80         return (initial == 0);
     81     }
     82 
     83     void unlock()
     84     {
     85         mLock = 0;
     86     }
     87 
     88     T* peek()
     89     {
     90         if (mNumEntries == 0)
     91         {
     92             return nullptr;
     93         }
     94         uint32_t block = mHead >> mBlockSizeShift;
     95         return &mBlocks[block][mHead & (mBlockSize-1)];
     96     }
     97 
     98     void dequeue_noinc()
     99     {
    100         mHead ++;
    101         mNumEntries --;
    102     }
    103 
    104     template <typename ArenaT>
    105     bool enqueue_try_nosync(ArenaT& arena, const T* entry)
    106     {
    107         const float* pSrc = (const float*)entry;
    108         float* pDst = (float*)&mCurBlock[mTail];
    109 
    110         auto lambda = [&](int32_t i)
    111         {
    112             __m256 vSrc = _simd_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
    113             _simd_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
    114         };
    115 
    116         const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
    117         static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
    118             "FIFO element size should be multiple of SIMD width.");
    119 
    120         UnrollerL<0, numSimdLines, 1>::step(lambda);
    121 
    122         mTail ++;
    123         if (mTail == mBlockSize)
    124         {
    125             if (++mCurBlockIdx < mBlocks.size())
    126             {
    127                 mCurBlock = mBlocks[mCurBlockIdx];
    128             }
    129             else
    130             {
    131                 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
    132                 SWR_ASSERT(newBlock);
    133 
    134                 mBlocks.push_back(newBlock);
    135                 mCurBlock = newBlock;
    136             }
    137 
    138             mTail = 0;
    139         }
    140 
    141         mNumEntries ++;
    142         return true;
    143     }
    144 
    145     void destroy()
    146     {
    147     }
    148 
    149 };
    150