1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2018 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 21 22 .text 23 .p2align 2 24 .global ixheaacd_covariance_matrix_calc_2_armv7 25 ixheaacd_covariance_matrix_calc_2_armv7: 26 STMFD sp!, {r4-r12, r14} 27 28 AUTO_CORR_LOOP: 29 30 STR r0 , [sp, #-4]! 31 32 33 34 35 36 37 38 39 40 41 LDR r4 , [r1, #-4*128] 42 LDR r5 , [r1, #4*(64-128)] 43 LDR r6 , [r1] 44 LDR r7 , [r1, #4*64] 45 46 MOV r4, r4, ASR #3 47 MOV r5, r5, ASR #3 48 MOV r6, r6, ASR #3 49 MOV r7, r7, ASR #3 50 51 52 SMULWT r8 , r6 , r4 53 SMULWT r9 , r7 , r4 54 SMULWT r10, r6 , r5 55 SMLAWT r8 , r7 , r5, r8 56 SMULWT r11, r4 , r4 57 SUB r9 , r9 , r10 58 SMLAWT r11, r5 , r5, r11 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 MOV r10, r1 74 ADD r12, r1, #64*4 75 MOV r4 , r6 76 MOV r5 , r7 77 78 SUB r14, r3 , #2 79 MOVS r14, r14, LSR #1 80 BEQ ENDLOOP2 81 LOOP2: 82 LDR r6 , [r10, #4*128]! 83 LDR r7 , [r12, #4*128]! 84 85 MOV r6, r6, ASR #3 86 MOV r7, r7, ASR #3 87 88 SMLAWT r8 , r6 , r4, r8 89 SMLAWT r9 , r7 , r4, r9 90 SMLAWT r8 , r7 , r5, r8 91 SMULWT r0 , r6 , r5 92 SMLAWT r11, r4 , r4, r11 93 SUB r9 , r9 , r0 94 SMLAWT r11, r5 , r5, r11 95 96 LDR r4 , [r10, #4*128]! 97 LDR r5 , [r12, #4*128]! 98 99 MOV r4, r4, ASR #3 100 MOV r5, r5, ASR #3 101 102 SUBS r14, r14, #1 103 104 SMLAWT r8 , r4 , r6, r8 105 SMLAWT r9 , r5 , r6, r9 106 SMLAWT r8 , r5 , r7, r8 107 SMULWT r0 , r4 , r7 108 SMLAWT r11, r6 , r6, r11 109 SUB r9 , r9 , r0 110 SMLAWT r11, r7 , r7, r11 111 112 BNE LOOP2 113 114 ANDS r0, r3, #0x01 115 BEQ ENDLOOP2 116 ODDLOOP: 117 118 LDR r6 , [r10, #4*128]! 119 LDR r7 , [r12, #4*128]! 120 121 MOV r6, r6, ASR #3 122 MOV r7, r7, ASR #3 123 124 SMLAWT r8 , r6 , r4, r8 125 SMLAWT r9 , r7 , r4, r9 126 SMLAWT r8 , r7 , r5, r8 127 SMULWT r0 , r6 , r5 128 SMLAWT r11, r4 , r4, r11 129 SUB r9 , r9 , r0 130 SMLAWT r11, r5 , r5, r11 131 132 133 134 ENDLOOP2: 135 136 137 138 139 140 141 142 143 144 MOV r12, r11 145 LDR r6 , [r1, #-8*128] 146 LDR r7 , [r1, #4*64-8*128] 147 148 MOV r6, r6, ASR #3 149 MOV r7, r7, ASR #3 150 151 SMLAWT r12, r6 , r6, r12 152 153 SUB r10, r3, #2 154 SMLAWT r12, r7 , r7, r12 155 156 157 158 159 MOV r0, r10, LSL #(2+7) 160 ADD r0, r0, #0x100 161 LDR r4 , [r1, r10, LSL #(2+7)] 162 LDR r5 , [r1, r0] 163 164 MOV r4, r4, ASR #3 165 MOV r5, r5, ASR #3 166 167 SMLAWT r11, r4, r4, r11 168 LDR r0 , [sp], #4 169 SMLAWT r11, r5, r5, r11 170 171 STR r12, [r0, #4] 172 STR r11, [r0] 173 174 175 176 177 178 179 180 181 182 183 184 185 MOV r11, r8 186 LDR r12, [r1, #-4*128] 187 LDR r14, [r1, #4*(64-128)] 188 189 MOV r12, r12, ASR #3 190 MOV r14, r14, ASR #3 191 192 193 SMLAWT r11, r12, r6, r11 194 ADD r10, r10, #1 195 196 LDR r12, [r1, r10, LSL#(2+7)] 197 SMLAWT r11, r14, r7, r11 198 199 MOV r14, r10, LSL #(2+7) 200 ADD r14, r14, #0x100 201 202 203 MOV r12, r12, ASR #3 204 205 LDR r14, [r1, r14] 206 207 SMLAWT r8 , r12, r4, r8 208 209 MOV r14, r14, ASR #3 210 MOV r10, r9 211 212 SMLAWT r8 , r14, r5, r8 213 STR r11, [r0, #16] 214 STR r8 , [r0, #8] 215 216 217 218 219 220 221 222 223 224 225 226 227 SMLAWT r9 , r14, r4 , r9 228 SMULWT r8 , r12, r5 229 LDR r14, [r1, #4*(64-128)] 230 SUB r9 , r9 , r8 231 232 MOV r14, r14, ASR #3 233 LDR r12, [r1, #-4*128] 234 SMLAWT r10, r14, r6 , r10 235 236 MOV r12, r12, ASR #3 237 SMULWT r8 , r12, r7 238 STR r9 , [r0, #20] 239 SUB r10, r10, r8 240 STR r10, [r0, #28] 241 242 243 244 245 246 247 248 249 STR r1 , [sp, #-4]! 250 251 STMFD sp!, {r0, r3} 252 MOVS r0 , r3 , LSR #2 253 254 MOV r12, #0 255 MOV r3 , #0 256 LDR r5 , [r1, #-8*128] 257 LDR r7 , [r1, #-4*128] 258 LDR r9 , [r1, #4*(64-256)] 259 LDR r11, [r1, #4*(64-128)] 260 261 262 263 MOV r5, r5, ASR #3 264 MOV r7, r7, ASR #3 265 MOV r9, r9, ASR #3 266 MOV r11, r11, ASR #3 267 268 269 BEQ ENDLOOP3 270 LOOP3: 271 272 273 274 275 276 277 LDR r4 , [r1], #4*128 278 LDR r8 , [r1, #4*(64-128)] 279 280 MOV r4, r4, ASR #3 281 MOV r8, r8, ASR #3 282 283 SMLAWT r12, r4 , r5 , r12 284 SMLAWT r12, r8 , r9 , r12 285 SMULWT r14, r4 , r9 286 SMLAWT r3 , r8 , r5 , r3 287 288 LDR r6 , [r1], #4*128 289 SUB r3 , r3 , r14 290 291 292 293 294 295 296 297 298 LDR r10, [r1, #4*(64-128)] 299 300 MOV r6, r6, ASR #3 301 MOV r10, r10, ASR #3 302 303 SMLAWT r12, r6 , r7 , r12 304 SMLAWT r12, r10, r11, r12 305 SMULWT r14, r6 , r11 306 SMLAWT r3 , r10, r7 , r3 307 308 LDR r5 , [r1], #4*128 309 SUB r3 , r3 , r14 310 311 312 313 314 315 316 317 318 LDR r9 , [r1, #4*(64-128)] 319 320 MOV r5, r5, ASR #3 321 MOV r9, r9, ASR #3 322 323 SMLAWT r12, r5 , r4 , r12 324 SMLAWT r12, r9 , r8 , r12 325 SMULWT r14, r5 , r8 326 SMLAWT r3 , r9 , r4 , r3 327 328 LDR r7 , [r1], #4*128 329 SUB r3 , r3 , r14 330 331 332 333 334 335 336 337 338 339 340 LDR r11, [r1, #4*(64-128)] 341 342 MOV r7, r7, ASR #3 343 MOV r11, r11, ASR #3 344 345 SMLAWT r12, r7 , r6 , r12 346 SMLAWT r12, r11, r10, r12 347 SMULWT r14, r7 , r10 348 SMLAWT r3 , r11, r6 , r3 349 350 SUBS r0 , r0 , #1 351 SUB r3 , r3 , r14 352 353 BNE LOOP3 354 ENDLOOP3: 355 MOV r4 , r3 356 LDMFD sp!, {r0, r3} 357 358 ANDS r5 , r3 , #3 359 BEQ ENDLOOP4 360 361 LOOP4: 362 LDR r6 , [r1, #-8*128] 363 LDR r10, [r1, #4*(64-256)] 364 365 LDR r7 , [r1], #4*128 366 LDR r11, [r1, #4*(64-128)] 367 368 369 MOV r6, r6, ASR #3 370 MOV r7, r7, ASR #3 371 MOV r10, r10, ASR #3 372 MOV r11, r11, ASR #3 373 374 375 SMLAWT r12, r7 , r6 , r12 376 SMLAWT r12, r11, r10, r12 377 SMULWT r14, r7 , r10 378 SMLAWT r4 , r11, r6 , r4 379 380 SUBSNE r5 , r5 , #1 381 382 SUB r4 , r4 , r14 383 384 BNE LOOP4 385 ENDLOOP4: 386 STR r12, [r0, #12] 387 STR r4 , [r0, #24] 388 LDR r1 , [sp], #4 389 390 SUBS R2, R2, #1 391 392 ADD r0, r0, #4*9 393 394 395 ADD r1, r1, #4 396 BGT AUTO_CORR_LOOP 397 398 END_OF_AUT0: 399 400 LDMFD sp!, {r4-r12, r15} 401 402