Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  icv_variance_sse42.c
     24 *
     25 * @brief
     26 *  This file contains the functions to compute variance
     27 *
     28 * @author
     29 *  Ittiam
     30 *
     31 * @par List of Functions:
     32 *  icv_variance_8x4_ssse3()
     33 *
     34 * @remarks
     35 *  None
     36 *
     37 *******************************************************************************
     38 */
     39 /*****************************************************************************/
     40 /* File Includes                                                             */
     41 /*****************************************************************************/
     42 /* System include files */
     43 #include <stdio.h>
     44 #include <stdint.h>
     45 #include <string.h>
     46 #include <stdlib.h>
     47 #include <assert.h>
     48 #include <immintrin.h>
     49 
     50 /* User include files */
     51 #include "icv_datatypes.h"
     52 #include "icv_macros.h"
     53 #include "icv_platform_macros.h"
     54 #include "icv.h"
     55 
     56 /**
     57 *******************************************************************************
     58 *
     59 * @brief
     60 *  Computes variance of a given 8x4 block
     61 *
     62 * @par   Description
     63 *  Compute variance of a given 8x4 block
     64 *
     65 * @param[in] pu1_src
     66 *  Source
     67 *
     68 * @param[in] src_strd
     69 *  Source stride
     70 *
     71 * @param[in] wd
     72 *  Assumed to be 8
     73 *
     74 * @param[in] ht
     75 *  Assumed to be 4
     76 *
     77 * @returns
     78 *  Variance
     79 *
     80 * @remarks
     81 *
     82 *******************************************************************************
     83 */
     84 WORD32 icv_variance_8x4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 ht)
     85 {
     86     WORD32 sum;
     87     WORD32 sum_sqr;
     88     WORD32 blk_sz;
     89     WORD32 vrnc;
     90     __m128  src_r0, src_r1;
     91     __m128i ssrc_r0, ssrc_r1, ssrc_r2, ssrc_r3;
     92     __m128i sum_r0, sum_r1;
     93     __m128i sqr_r0, sqr_r1, sqr_r2, sqr_r3;
     94     __m128i vsum, vsum_sqr;
     95     __m128i zero;
     96     UNUSED(wd);
     97     UNUSED(ht);
     98 
     99     ASSERT(wd == 8);
    100     ASSERT(ht == 4);
    101 
    102     sum     = 0;
    103     sum_sqr = 0;
    104 
    105     blk_sz = 8 * 4;
    106 
    107     zero = _mm_setzero_si128();
    108 
    109     /* Load source */
    110     src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
    111     pu1_src += src_strd;
    112 
    113     src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
    114     pu1_src += src_strd;
    115 
    116     src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
    117     pu1_src += src_strd;
    118 
    119     src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
    120     pu1_src += src_strd;
    121 
    122     /* Compute sum of all elements */
    123     /* Use SAD with 0, since there is no pairwise addition */
    124     sum_r0  = _mm_sad_epu8((__m128i)src_r0, zero);
    125     sum_r1  = _mm_sad_epu8((__m128i)src_r1, zero);
    126 
    127     /* Accumulate SAD */
    128     vsum    = _mm_add_epi64(sum_r0, sum_r1);
    129     vsum    = _mm_add_epi64(vsum, _mm_srli_si128(vsum, 8));
    130 
    131     sum = _mm_cvtsi128_si32(vsum);
    132 
    133     /* Unpack to 16 bits */
    134     ssrc_r0 = _mm_unpacklo_epi8((__m128i)src_r0, zero);
    135     ssrc_r1 = _mm_unpacklo_epi8((__m128i)src_r1, zero);
    136     ssrc_r2 = _mm_unpackhi_epi8((__m128i)src_r0, zero);
    137     ssrc_r3 = _mm_unpackhi_epi8((__m128i)src_r1, zero);
    138 
    139     /* Compute sum of squares */
    140     sqr_r0 = _mm_madd_epi16(ssrc_r0,  ssrc_r0);
    141     sqr_r1 = _mm_madd_epi16(ssrc_r1,  ssrc_r1);
    142     sqr_r2 = _mm_madd_epi16(ssrc_r2,  ssrc_r2);
    143     sqr_r3 = _mm_madd_epi16(ssrc_r3,  ssrc_r3);
    144 
    145     vsum_sqr = _mm_add_epi32(sqr_r0,   sqr_r1);
    146     vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r2);
    147     vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r3);
    148 
    149     vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 8));
    150     vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 4));
    151     sum_sqr  = _mm_cvtsi128_si32(vsum_sqr);
    152 
    153     /* Compute variance */
    154     vrnc = ((sum_sqr * blk_sz) - (sum * sum)) / (blk_sz * blk_sz);
    155 
    156     return vrnc;
    157 }
    158 
    159