1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 21 @****************************************************************************** 22 @* 23 @* @brief 24 @* This file contains definitions of routines for spatial filter 25 @* 26 @* @author 27 @* Ittiam 28 @* 29 @* @par List of Functions: 30 @* - ideint_spatial_filter_a9() 31 @* 32 @* @remarks 33 @* None 34 @* 35 @******************************************************************************* 36 37 38 @****************************************************************************** 39 @* 40 @* @brief Performs spatial filtering 41 @* 42 @* @par Description 43 @* This functions performs edge adaptive spatial filtering on a 8x8 block 44 @* 45 @* @param[in] pu1_src 46 @* UWORD8 pointer to the source 47 @* 48 @* @param[in] pu1_out 49 @* UWORD8 pointer to the destination 50 @* 51 @* @param[in] src_strd 52 @* source stride 53 @* 54 @* @param[in] src_strd 55 @* destination stride 56 @* 57 @* @returns 58 @* None 59 @* 60 @* @remarks 61 @* 62 @****************************************************************************** 63 64 .global ideint_spatial_filter_a9 65 66 ideint_spatial_filter_a9: 67 68 stmfd sp!, {r4-r10, lr} 69 70 vmov.u16 q8, #0 71 vmov.u16 q9, #0 72 vmov.u16 q10, #0 73 74 @ Backup r0 75 mov r10, r0 76 77 @ Load from &pu1_row_1[0] 78 sub r5, r0, #1 79 vld1.8 d0, [r0], r2 80 81 @ Load from &pu1_row_1[-1] 82 vld1.8 d1, [r5] 83 add r5, r5, #2 84 85 @ Load from &pu1_row_1[1] 86 vld1.8 d2, [r5] 87 88 @ Number of rows 89 mov r4, #4 90 91 @ EDGE_BIAS_0 92 vmov.u32 d30, #5 93 94 @ EDGE_BIAS_1 95 vmov.u32 d31, #7 96 97 detect_edge: 98 @ Load from &pu1_row_2[0] 99 sub r5, r0, #1 100 vld1.8 d3, [r0], r2 101 102 @ Load from &pu1_row_2[-1] 103 vld1.8 d4, [r5] 104 add r5, r5, #2 105 106 @ Load from &pu1_row_2[1] 107 vld1.8 d5, [r5] 108 109 @ Calculate absolute differences 110 @ pu1_row_1[i] - pu1_row_2[i] 111 vabal.u8 q8, d0, d3 112 113 @ pu1_row_1[i - 1] - pu1_row_2[i + 1] 114 vabal.u8 q9, d1, d5 115 116 @ pu1_row_1[i + 1] - pu1_row_2[i - 1] 117 vabal.u8 q10, d4, d2 118 119 vmov d0, d3 120 vmov d1, d4 121 vmov d2, d5 122 123 subs r4, r4, #1 124 bgt detect_edge 125 126 @ Calculate sum of absolute differeces for each edge 127 vpadd.u16 d16, d16, d17 128 vpadd.u16 d18, d18, d19 129 vpadd.u16 d20, d20, d21 130 131 vpaddl.u16 d16, d16 132 vpaddl.u16 d18, d18 133 vpaddl.u16 d20, d20 134 135 @ adiff[0] *= EDGE_BIAS_0; 136 vmul.u32 d16, d16, d30 137 138 @ adiff[1] *= EDGE_BIAS_1; 139 vmul.u32 d18, d18, d31 140 141 @ adiff[2] *= EDGE_BIAS_1; 142 vmul.u32 d20, d20, d31 143 144 @ Move the differences to ARM registers 145 146 147 @ Compute shift for first half of the block 148 compute_shift_1: 149 vmov.u32 r5, d16[0] 150 vmov.u32 r6, d18[0] 151 vmov.u32 r7, d20[0] 152 153 @ Compute shift 154 mov r8, #0 155 156 @ adiff[2] <= adiff[1] 157 cmp r7, r6 158 bgt dir_45_gt_135_1 159 160 @ adiff[2] <= adiff[0] 161 cmp r7, r5 162 movle r8, #1 163 164 b compute_shift_2 165 dir_45_gt_135_1: 166 167 @ adiff[1] <= adiff[0] 168 cmp r6, r5 169 @ Move -1 if less than or equal to 170 mvnle r8, #0 171 172 173 compute_shift_2: 174 @ Compute shift for first half of the block 175 vmov.u32 r5, d16[1] 176 vmov.u32 r6, d18[1] 177 vmov.u32 r7, d20[1] 178 179 @ Compute shift 180 mov r9, #0 181 182 @ adiff[2] <= adiff[1] 183 cmp r7, r6 184 bgt dir_45_gt_135_2 185 186 @ adiff[2] <= adiff[0] 187 cmp r7, r5 188 movle r9, #1 189 190 b interpolate 191 dir_45_gt_135_2: 192 193 @ adiff[1] <= adiff[0] 194 cmp r6, r5 195 196 @ Move -1 if less than or equal to 197 mvnle r9, #0 198 199 interpolate: 200 add r4, r10, r8 201 add r5, r10, r2 202 sub r5, r5, r8 203 204 add r10, r10, #4 205 add r6, r10, r9 206 add r7, r10, r2 207 sub r7, r7, r9 208 mov r8, #4 209 210 filter_loop: 211 vld1.u32 d0[0], [r4], r2 212 vld1.u32 d2[0], [r5], r2 213 214 vld1.u32 d0[1], [r6], r2 215 vld1.u32 d2[1], [r7], r2 216 217 vrhadd.u8 d4, d0, d2 218 vst1.u32 d4, [r1], r3 219 220 subs r8, #1 221 bgt filter_loop 222 223 ldmfd sp!, {r4-r10, pc} 224