1 ///***************************************************************************** 2 //* 3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //*****************************************************************************/ 18 ///** 19 ///******************************************************************************* 20 //* @file 21 //* ihevc_deblk_luma_vert.s 22 //* 23 //* @brief 24 //* contains function definitions for inter prediction interpolation. 25 //* functions are coded using neon intrinsics and can be compiled using 26 27 //* rvct 28 //* 29 //* @author 30 //* anand s 31 //* 32 //* @par list of functions: 33 //* 34 //* 35 //* @remarks 36 //* none 37 //* 38 //*******************************************************************************/ 39 //void ihevc_deblk_chroma_vert(UWORD8 *pu1_src, 40 // WORD32 src_strd, 41 // WORD32 quant_param_p, 42 // WORD32 quant_param_q, 43 // WORD32 qp_offset_u, 44 // WORD32 qp_offset_v, 45 // WORD32 tc_offset_div2, 46 // WORD32 filter_flag_p, 47 // WORD32 filter_flag_q) 48 49 .text 50 .align 4 51 .include "ihevc_neon_macros.s" 52 53 54 55 .extern gai4_ihevc_qp_table 56 .extern gai4_ihevc_tc_table 57 .globl ihevc_deblk_chroma_vert_av8 58 59 .type ihevc_deblk_chroma_vert_av8, %function 60 61 ihevc_deblk_chroma_vert_av8: 62 sxtw x4,w4 63 sxtw x5,w5 64 sxtw x6,w6 65 mov x15,x5 66 mov x5,x6 67 mov x6,x15 68 mov x12, x7 69 mov x7, x4 70 ldr w4, [sp] 71 72 push_v_regs 73 stp x19, x20,[sp,#-16]! 74 75 sub x8,x0,#4 76 add x2,x2,x3 77 ld1 {v5.8b},[x8],x1 78 add x2,x2,#1 79 ld1 {v17.8b},[x8],x1 80 ld1 {v16.8b},[x8],x1 81 ld1 {v4.8b},[x8] 82 83 trn1 v29.8b, v5.8b, v17.8b 84 trn2 v17.8b, v5.8b, v17.8b 85 mov v5.d[0], v29.d[0] 86 adds x3,x7,x2,asr #1 87 trn1 v29.8b, v16.8b, v4.8b 88 trn2 v4.8b, v16.8b, v4.8b 89 mov v16.d[0], v29.d[0] 90 adrp x7, :got:gai4_ihevc_qp_table 91 ldr x7, [x7, #:got_lo12:gai4_ihevc_qp_table] 92 93 94 bmi l1.2944 95 cmp x3,#0x39 96 bgt lbl78 97 ldr w3, [x7,x3,lsl #2] 98 sxtw x3,w3 99 lbl78: 100 sub x20,x3,#6 101 csel x3, x20, x3,gt 102 l1.2944: 103 trn1 v29.4h, v5.4h, v16.4h 104 trn2 v16.4h, v5.4h, v16.4h 105 mov v5.d[0], v29.d[0] 106 adds x2,x6,x2,asr #1 107 trn1 v29.4h, v17.4h, v4.4h 108 trn2 v4.4h, v17.4h, v4.4h 109 mov v17.d[0], v29.d[0] 110 bmi l1.2964 111 cmp x2,#0x39 112 bgt lbl86 113 ldr w2, [x7,x2,lsl #2] 114 sxtw x2,w2 115 lbl86: 116 sub x20,x2,#6 117 csel x2, x20, x2,gt 118 l1.2964: 119 trn1 v29.2s, v5.2s, v17.2s 120 trn2 v17.2s, v5.2s, v17.2s 121 mov v5.d[0], v29.d[0] 122 add x3,x3,x5,lsl #1 123 trn1 v29.2s, v16.2s, v4.2s 124 trn2 v4.2s, v16.2s, v4.2s 125 mov v16.d[0], v29.d[0] 126 add x6,x3,#2 127 uxtl v18.8h, v17.8b 128 cmp x6,#0x35 129 mov x20,#0x35 130 csel x3, x20, x3,gt 131 bgt l1.2996 132 adds x6,x3,#2 133 add x20,x3,#2 134 csel x3, x20, x3,pl 135 mov x20,#0 136 csel x3, x20, x3,mi 137 l1.2996: 138 usubl v0.8h, v17.8b, v16.8b 139 adrp x6, :got:gai4_ihevc_tc_table 140 ldr x6, [x6, #:got_lo12:gai4_ihevc_tc_table] 141 shl v0.8h, v0.8h,#2 142 add x2,x2,x5,lsl #1 143 add x5,x2,#2 144 uaddw v0.8h, v0.8h , v5.8b 145 cmp x5,#0x35 146 ldr w3, [x6,x3,lsl #2] 147 sxtw x3,w3 148 usubw v4.8h, v0.8h , v4.8b 149 mov x20,#0x35 150 csel x2, x20, x2,gt 151 bgt l1.3036 152 adds x5,x2,#2 153 add x20,x2,#2 154 csel x2, x20, x2,pl 155 mov x20,#0 156 csel x2, x20, x2,mi 157 l1.3036: 158 159 160 srshr v6.8h, v4.8h,#3 161 dup v2.4h,w3 162 ldr w2, [x6,x2,lsl #2] 163 sxtw x2,w2 164 sub x20,x3,#0 165 neg x3, x20 166 cmp x12,#0 167 dup v3.4h,w2 168 sub x20,x2,#0 169 neg x2, x20 170 dup v30.4h,w3 171 dup v31.4h,w2 172 173 mov v30.d[1],v31.d[0] 174 mov v2.d[1],v3.d[0] 175 176 smin v4.8h, v6.8h , v2.8h 177 smax v2.8h, v30.8h , v4.8h 178 179 uxtl v6.8h, v16.8b 180 181 add v0.8h, v6.8h , v2.8h 182 sub v2.8h, v18.8h , v2.8h 183 sqxtun v0.8b, v0.8h 184 sub x2,x0,#2 185 sqxtun v1.8b, v2.8h 186 trn1 v29.2s, v0.2s, v1.2s 187 trn2 v1.2s, v0.2s, v1.2s 188 mov v0.d[0], v29.d[0] 189 trn1 v29.8b, v0.8b, v1.8b 190 trn2 v1.8b, v0.8b, v1.8b 191 mov v0.d[0], v29.d[0] 192 beq l1.3204 193 194 st1 {v0.h}[0],[x2],x1 195 st1 {v1.h}[0],[x2],x1 196 st1 {v0.h}[1],[x2],x1 197 st1 {v1.h}[1],[x2] 198 l1.3204: 199 cmp x4,#0 200 beq l1.3228 201 st1 {v0.h}[2],[x0],x1 202 st1 {v1.h}[2],[x0],x1 203 st1 {v0.h}[3],[x0],x1 204 st1 {v1.h}[3],[x0] 205 l1.3228: 206 ldp x19, x20,[sp],#16 207 pop_v_regs 208 ret 209 210 211 212