Home | History | Annotate | Download | only in armv8
      1 .macro push_v_regs
      2     stp             X8, X9, [sp, #-16]!
      3     stp             X10, X11, [sp, #-16]!
      4     stp             X12, X13, [sp, #-16]!
      5     stp             X14, X15, [sp, #-16]!
      6     stp             X20, X21, [sp, #-16]!
      7     stp             X26, X17, [sp, #-16]!
      8     stp             X27, X28, [sp, #-16]!
      9     stp             q2, q3, [sp, #-32]!
     10     stp             q0, q1, [sp, #-32]!
     11 .endm
     12 .macro pop_v_regs
     13     ldp             q0, q1, [sp], #32
     14     ldp             q2, q3, [sp], #32
     15     ldp             X27, X28, [sp], #16
     16     ldp             X26, X17, [sp], #16
     17     ldp             X20, X21, [sp], #16
     18     ldp             X14, X15, [sp], #16
     19     ldp             X12, X13, [sp], #16
     20     ldp             X10, X11, [sp], #16
     21     ldp             X8, X9, [sp], #16
     22 .endm
     23 
     24 .text
     25 .p2align 2
     26     .global ixheaacd_shiftrountine_with_rnd_eld
     27 ixheaacd_shiftrountine_with_rnd_eld:
     28     push_v_regs
     29 
     30     ADD             x12, x2, x3, LSL #1
     31     MOV             W9, #0x00008000
     32     DUP             V0.4s, w9
     33     MOVI            v3.4s, #9
     34     MOV             W27, #0x80000000
     35     MOV             W28, #0x7fffffff
     36     MOV             W26, #0
     37     SUBS            W3, W3, #1
     38     BMI             S_WITH_R_L6
     39 
     40 S_WITH_R_L5:
     41     LDR             w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
     42     LDR             w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
     43     LDR             w14, [x0], #4       //x1 = *qmfReal
     44     LDR             w10, [x1], #4       //i1 = *qmfImag
     45 
     46     ADD             w6, w5, w7          //*qmfImag++ = add32(i2, x2)
     47     MVN             w6, w6
     48     ADD             w6, w6, #1
     49     SUB             w5, w7, w5          //qmfReal[j] = sub32(i2, x2)
     50     ADD             w7, w10, w14        //qmfImag[j] = add32(i1, x1)
     51     MVN             w7, w7
     52     ADD             w7, w7, #1
     53     SUB             w4, w14, w10        //*qmfReal++ = sub32(i1, x1)
     54 
     55 
     56 
     57     MOV             v1.s[0], W4         //QADD        x4, x4, x9
     58     MOV             v1.s[1], W5         //QADD        x4, x4, x9
     59     MOV             v1.s[2], W6         //QADD        x4, x4, x9
     60     MOV             v1.s[3], W7         //QADD        x4, x4, x9
     61     lsl             w14, w3, #1
     62 
     63     SQSHL           v1.4s, v1.4s, v3.4s
     64     ADD             X17, X2, X14
     65 
     66     SQADD           v2.4s, v1.4s, v0.4s
     67 
     68     ST1             {v2.h}[1], [x2], #2
     69     ST1             {v2.h}[3], [X17]
     70     ADD             X17, X12, X14
     71     ST1             {v2.h}[7], [x17]    //STRH   w7, [x12, x14]
     72     ST1             {v2.h}[5], [x12], #2 //STRH   w6, [x12], #2
     73 
     74     SUBS            x3, x3, #2
     75 
     76     BGE             S_WITH_R_L5
     77 S_WITH_R_L6:
     78     pop_v_regs
     79     ret
     80