Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  icv_sad.c
     24 *
     25 * @brief
     26 *  This file contains the functions to compute SAD
     27 *
     28 * @author
     29 *  Ittiam
     30 *
     31 * @par List of Functions:
     32 *  icv_sad_8x4_ssse3()
     33 *
     34 * @remarks
     35 *  None
     36 *
     37 *******************************************************************************
     38 */
     39 /*****************************************************************************/
     40 /* File Includes                                                             */
     41 /*****************************************************************************/
     42 /* System include files */
     43 #include <stdio.h>
     44 #include <stdint.h>
     45 #include <string.h>
     46 #include <stdlib.h>
     47 #include <assert.h>
     48 #include <immintrin.h>
     49 
     50 /* User include files */
     51 #include "icv_datatypes.h"
     52 #include "icv_macros.h"
     53 #include "icv_platform_macros.h"
     54 #include "icv.h"
     55 
     56 /**
     57 *******************************************************************************
     58 *
     59 * @brief
     60 *  Compute 8x4 SAD
     61 *
     62 * @par   Description
     63 *  Compute 8x4 sum of absolute differences between source and reference block
     64 *
     65 * @param[in] pu1_src
     66 *  Source buffer
     67 *
     68 * @param[in] pu1_ref
     69 *  Reference buffer
     70 *
     71 * @param[in] src_strd
     72 *  Source stride
     73 *
     74 * @param[in] ref_strd
     75 *  Reference stride
     76 *
     77 * @param[in] wd
     78 *  Assumed to be 8
     79 *
     80 * @param[in] ht
     81 *  Assumed to be 4
     82 
     83 * @returns
     84 *  SAD
     85 *
     86 * @remarks
     87 *
     88 *******************************************************************************
     89 */
     90 WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src,
     91                          UWORD8 *pu1_ref,
     92                          WORD32 src_strd,
     93                          WORD32 ref_strd,
     94                          WORD32 wd,
     95                          WORD32 ht)
     96 {
     97     WORD32 sad;
     98     __m128 src_r0, src_r1;
     99     __m128 ref_r0, ref_r1;
    100     __m128i res_r0, res_r1;
    101 
    102     UNUSED(wd);
    103     UNUSED(ht);
    104     ASSERT(wd == 8);
    105     ASSERT(ht == 4);
    106 
    107     /* Load source */
    108     src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
    109     pu1_src += src_strd;
    110 
    111     src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
    112     pu1_src += src_strd;
    113 
    114     src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
    115     pu1_src += src_strd;
    116 
    117     src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
    118     pu1_src += src_strd;
    119 
    120 
    121     /* Load reference */
    122     ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
    123     pu1_ref += ref_strd;
    124 
    125     ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
    126     pu1_ref += ref_strd;
    127 
    128     ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref));
    129     pu1_ref += ref_strd;
    130 
    131     ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref));
    132     pu1_ref += ref_strd;
    133 
    134     /* Compute SAD for each row */
    135     res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0);
    136     res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1);
    137 
    138     /* Accumulate SAD */
    139     res_r0 = _mm_add_epi64(res_r0,  res_r1);
    140     res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8));
    141 
    142     sad  = _mm_cvtsi128_si32(res_r0);
    143 
    144     return sad;
    145 }
    146