1 ///***************************************************************************** 2 //* 3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //*****************************************************************************/ 18 ///** 19 ///******************************************************************************* 20 //* //file 21 //* ihevcd_fmt_conv_420sp_to_420sp.s 22 //* 23 //* //brief 24 //* contains function definitions for format conversions 25 //* 26 //* //author 27 //* ittiam 28 //* 29 //* //par list of functions: 30 //* 31 //* 32 //* //remarks 33 //* none 34 //* 35 //*******************************************************************************/ 36 .equ DO1STROUNDING, 0 37 38 // ARM 39 // 40 // PRESERVE8 41 42 .text 43 .p2align 2 44 45 .include "ihevc_neon_macros.s" 46 47 48 49 50 ///***************************************************************************** 51 //* * 52 //* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * 53 //* * 54 //* Description : This function conversts the image from YUV420SP color * 55 //* space to 420SP color space(UV interleaved). * 56 //* * 57 //* Arguments : x0 pu1_y * 58 //* x1 pu1_uv * 59 //* x2 pu1_dest_y * 60 //* x3 pu1_dest_uv * 61 //* [x13 #40] u2_width * 62 //* [x13 #44] u2_height * 63 //* [x13 #48] u2_stridey * 64 //* [x13 #52] u2_stridechroma * 65 //* [x13 #56] u2_dest_stridey * 66 //* [x13 #60] u2_dest_stridechroma * 67 //* * 68 //* Values Returned : None * 69 //* * 70 //* Register Usage : x0 - x14 * 71 //* * 72 //* Stack Usage : 40 Bytes * 73 //* * 74 //* Interruptibility : Interruptible * 75 //* * 76 //* Known Limitations * 77 //* Assumptions: Image Width: Assumed to be multiple of 2 and * 78 //* Image Height: Assumed to be even. * 79 //* * 80 //* Revision History : * 81 //* DD MM YYYY Author(s) Changes (Describe the changes made) * 82 //* 16 05 2012 Naveen SR draft * 83 //* * 84 //*****************************************************************************/ 85 86 .global ihevcd_fmt_conv_420sp_to_420sp_av8 87 .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function 88 ihevcd_fmt_conv_420sp_to_420sp_av8: 89 90 // STMFD sp!,{x4-x12, x14} 91 push_v_regs 92 stp x19, x20,[sp,#-16]! 93 94 mov x8, x4 ////Load u2_width 95 mov x9, x5 ////Load u2_height 96 97 LDR w5, [sp,#80] ////Load u2_dest_stridey 98 sxtw x5,w5 99 100 mov x7, x6 ////Load u2_stridey 101 102 SUB x10,x7,x8 //// Src Y increment 103 SUB x11,x5,x8 //// Dst Y increment 104 105 ///* Copy Y */ 106 107 MOV x4,x9 //// Copying height 108 y_row_loop: 109 MOV x6,x8 //// Copying width 110 111 y_col_loop: 112 prfm PLDL1KEEP,[x0, #128] 113 SUB x6,x6,#32 114 LD1 {v0.8b},[x0],#8 115 LD1 {v1.8b},[x0],#8 116 LD1 {v2.8b},[x0],#8 117 LD1 {v3.8b},[x0],#8 118 ST1 {v0.8b},[x2],#8 119 ST1 {v1.8b},[x2],#8 120 ST1 {v2.8b},[x2],#8 121 ST1 {v3.8b},[x2],#8 122 CMP x6,#32 123 BGE y_col_loop 124 CMP x6,#0 125 BEQ y_col_loop_end 126 ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 127 ////Ex if width is 162, above loop will process 160 pixels. And 128 ////Both source and destination will point to 146th pixel and then 16 bytes will be read 129 //// and written using VLD1 and VST1 130 sub x20,x6,#32 131 neg x6, x20 132 SUB x0,x0,x6 133 SUB x2,x2,x6 134 LD1 {v0.8b},[x0],#8 135 LD1 {v1.8b},[x0],#8 136 LD1 {v2.8b},[x0],#8 137 LD1 {v3.8b},[x0],#8 138 ST1 {v0.8b},[x2],#8 139 ST1 {v1.8b},[x2],#8 140 ST1 {v2.8b},[x2],#8 141 ST1 {v3.8b},[x2],#8 142 143 y_col_loop_end: 144 ADD x0, x0, x10 145 ADD x2, x2, x11 146 SUBS x4, x4, #1 147 BGT y_row_loop 148 149 150 151 ///* Copy UV */ 152 153 LDR w5, [sp,#88] ////Load u2_dest_stridechroma 154 sxtw x5,w5 155 156 LSR x9, x9, #1 //// height/2 157 // MOV x8,x8,LSR #1 @// Width/2 158 159 MOV x2,x3 //pu1_dest_uv 160 161 SUB x10,x7,x8 //// Src UV increment 162 SUB x11,x5,x8 //// Dst UV increment 163 164 MOV x4,x9 //// Copying height 165 uv_row_loop: 166 MOV x6,x8 //// Copying width 167 168 uv_col_loop: 169 170 prfm PLDL1KEEP,[x1, #128] 171 SUB x6,x6,#16 172 LD1 {v0.8b},[x1],#8 173 LD1 {v1.8b},[x1],#8 174 ST1 {v0.8b},[x2],#8 175 ST1 {v1.8b},[x2],#8 176 CMP x6,#16 177 BGE uv_col_loop 178 CMP x6,#0 179 BEQ u_col_loop_end 180 ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 181 ////Ex if width is 162, above loop will process 160 pixels. And 182 ////Both source and destination will point to 146th pixel and then 16 bytes will be read 183 //// and written using VLD1 and VST1 184 sub x20,x6,#16 185 neg x6, x20 186 SUB x1,x1,x6 187 SUB x2,x2,x6 188 LD1 {v0.8b},[x1],#8 189 LD1 {v1.8b},[x1],#8 190 ST1 {v0.8b},[x2],#8 191 ST1 {v1.8b},[x2],#8 192 193 u_col_loop_end: 194 ADD x1, x1, x10 195 ADD x2, x2, x11 196 SUBS x4, x4, #1 197 BGT uv_row_loop 198 199 exit: 200 // LDMFD sp!,{x4-x12, pc} 201 ldp x19, x20,[sp],#16 202 pop_v_regs 203 ret 204 205 206 .section .note.GNU-stack,"",%progbits 207 208