1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * icv_sad.c 24 * 25 * @brief 26 * This file contains the functions to compute SAD 27 * 28 * @author 29 * Ittiam 30 * 31 * @par List of Functions: 32 * icv_sad_8x4_ssse3() 33 * 34 * @remarks 35 * None 36 * 37 ******************************************************************************* 38 */ 39 /*****************************************************************************/ 40 /* File Includes */ 41 /*****************************************************************************/ 42 /* System include files */ 43 #include <stdio.h> 44 #include <stdint.h> 45 #include <string.h> 46 #include <stdlib.h> 47 #include <assert.h> 48 #include <immintrin.h> 49 50 /* User include files */ 51 #include "icv_datatypes.h" 52 #include "icv_macros.h" 53 #include "icv_platform_macros.h" 54 #include "icv.h" 55 56 /** 57 ******************************************************************************* 58 * 59 * @brief 60 * Compute 8x4 SAD 61 * 62 * @par Description 63 * Compute 8x4 sum of absolute differences between source and reference block 64 * 65 * @param[in] pu1_src 66 * Source buffer 67 * 68 * @param[in] pu1_ref 69 * Reference buffer 70 * 71 * @param[in] src_strd 72 * Source stride 73 * 74 * @param[in] ref_strd 75 * Reference stride 76 * 77 * @param[in] wd 78 * Assumed to be 8 79 * 80 * @param[in] ht 81 * Assumed to be 4 82 83 * @returns 84 * SAD 85 * 86 * @remarks 87 * 88 ******************************************************************************* 89 */ 90 WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src, 91 UWORD8 *pu1_ref, 92 WORD32 src_strd, 93 WORD32 ref_strd, 94 WORD32 wd, 95 WORD32 ht) 96 { 97 WORD32 sad; 98 __m128 src_r0, src_r1; 99 __m128 ref_r0, ref_r1; 100 __m128i res_r0, res_r1; 101 102 UNUSED(wd); 103 UNUSED(ht); 104 ASSERT(wd == 8); 105 ASSERT(ht == 4); 106 107 /* Load source */ 108 src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); 109 pu1_src += src_strd; 110 111 src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); 112 pu1_src += src_strd; 113 114 src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src)); 115 pu1_src += src_strd; 116 117 src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src)); 118 pu1_src += src_strd; 119 120 121 /* Load reference */ 122 ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); 123 pu1_ref += ref_strd; 124 125 ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); 126 pu1_ref += ref_strd; 127 128 ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref)); 129 pu1_ref += ref_strd; 130 131 ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref)); 132 pu1_ref += ref_strd; 133 134 /* Compute SAD for each row */ 135 res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0); 136 res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1); 137 138 /* Accumulate SAD */ 139 res_r0 = _mm_add_epi64(res_r0, res_r1); 140 res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8)); 141 142 sad = _mm_cvtsi128_si32(res_r0); 143 144 return sad; 145 } 146