1 @/***************************************************************************** 2 @* 3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 @* 5 @* Licensed under the Apache License, Version 2.0 (the "License"); 6 @* you may not use this file except in compliance with the License. 7 @* You may obtain a copy of the License at: 8 @* 9 @* http://www.apache.org/licenses/LICENSE-2.0 10 @* 11 @* Unless required by applicable law or agreed to in writing, software 12 @* distributed under the License is distributed on an "AS IS" BASIS, 13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @* See the License for the specific language governing permissions and 15 @* limitations under the License. 16 @* 17 @*****************************************************************************/ 18 @/** 19 @/******************************************************************************* 20 @* @file 21 @* ihevcd_fmt_conv_420sp_to_420p.s 22 @* 23 @* @brief 24 @* contains function definitions for format conversions 25 @* 26 @* @author 27 @* ittiam 28 @* 29 @* @par list of functions: 30 @* 31 @* 32 @* @remarks 33 @* none 34 @* 35 @*******************************************************************************/ 36 37 38 39 40 41 42 43 44 .text 45 46 47 48 49 50 @/***************************************************************************** 51 @* * 52 @* Function Name : neon_copy_yuv420sp_to_yuv420p() * 53 @* * 54 @* Description : This function conversts the image from YUV420sP color * 55 @* space to 420SP color space(UV interleaved). * 56 @* * 57 @* Arguments : R0 pu1_src_y * 58 @* R1 pu1_src_uv * 59 @* R2 pu1_dest_y * 60 @* R3 pu1_dest_u * 61 @* [R13 #40] pu1_dest_v * 62 @* [R13 #44] u2_width * 63 @* [R13 #48] u2_height * 64 @* [R13 #52] u2_stridey * 65 @* [R13 #56] u2_strideuv * 66 @* [R13 #60] u2_dest_stridey * 67 @* [R13 #64] u2_dest_strideuv * 68 @* [R13 #68] is_u_first * 69 @* [R13 #72] disable_luma_copy * 70 @* * 71 @* Values Returned : None * 72 @* * 73 @* Register Usage : R0 - R14 * 74 @* * 75 @* Stack Usage : 40 Bytes * 76 @* * 77 @* Interruptibility : Interruptible * 78 @* * 79 @* Known Limitations * 80 @* Assumptions: Image Width: Assumed to be multiple of 2 and * 81 @* Image Height: Assumed to be even. * 82 @* * 83 @* Revision History : * 84 @* DD MM YYYY Author(s) Changes (Describe the changes made) * 85 @* 16 05 2012 Naveen SR draft * 86 @* * 87 @*****************************************************************************/ 88 89 .globl ihevcd_fmt_conv_420sp_to_420p_a9q 90 91 .type ihevcd_fmt_conv_420sp_to_420p_a9q, %function 92 93 ihevcd_fmt_conv_420sp_to_420p_a9q: 94 STMFD sp!,{r4-r12, lr} 95 96 LDR r5,[sp,#60] @//Load u2_dest_stridey 97 @ LDR r6,[sp,#56] @//Load u2_strideuv 98 LDR r7,[sp,#52] @//Load u2_stridey 99 LDR r8,[sp,#44] @//Load u2_width 100 LDR r9,[sp,#48] @//Load u2_height 101 102 SUB r10,r7,r8 @// Src Y increment 103 SUB r11,r5,r8 @// Dst Y increment 104 105 LDR r5,[sp,#72] @//Load disable_luma_copy flag 106 CMP r5,#0 @//skip luma if disable_luma_copy is non-zero 107 BNE uv_copy_start 108 109 @/* Copy Y */ 110 111 MOV r4,r9 @// Copying height 112 y_row_loop: 113 MOV r6,r8 @// Copying width 114 115 y_col_loop: 116 117 SUB r6,r6,#16 118 vld1.8 {d0,d1},[r0]! 119 vst1.8 {d0,d1},[r2]! 120 CMP r6,#16 121 BGE y_col_loop 122 CMP r6,#0 123 BEQ y_col_loop_end 124 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 125 @//Ex if width is 162, above loop will process 160 pixels. And 126 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 127 @// and written using VLD1 and VST1 128 RSB r6,r6,#16 129 SUB r0,r0,r6 130 SUB r2,r2,r6 131 vld1.8 {d0,d1}, [r0]! 132 vst1.8 {d0,d1}, [r2]! 133 134 y_col_loop_end: 135 ADD r0, r0, r10 136 ADD r2, r2, r11 137 SUBS r4, r4, #1 138 BGT y_row_loop 139 140 141 @/* Copy UV */ 142 uv_copy_start: 143 144 LDR r5,[sp,#64] @//Load u2_dest_strideuv 145 LDR r7,[sp,#56] @//Load u2_strideuv 146 147 MOV r9,r9,LSR #1 @// height/2 148 @ MOV r8,r8,LSR #1 @// Width/2 149 150 SUB r10,r7,r8 @// Src UV increment 151 MOV r11,r8,LSR #1 152 SUB r11,r5,r11 @// Dst U and V increment 153 154 LDR r5,[sp,#40] @//Load pu1_dest_v 155 156 LDR r4,[sp,#68] @//Load is_u_first_flag 157 CMP r4,#0 @//Swap U and V dest if is_u_first_flag is zero 158 MOVEQ r4,r5 159 MOVEQ r5,r3 160 MOVEQ r3,r4 161 162 MOV r4,r9 @// Copying height 163 uv_row_loop: 164 MOV r6,r8 @// Copying width 165 166 uv_col_loop: 167 168 SUB r6,r6,#16 169 170 PLD [r1,#128] 171 vld2.8 {d0,d1},[r1]! 172 VST1.8 D0,[r3]! 173 VST1.8 D1,[r5]! 174 CMP r6,#16 175 BGE uv_col_loop 176 CMP r6,#0 177 BEQ uv_col_loop_end 178 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 179 @//Ex if width is 162, above loop will process 160 pixels. And 180 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 181 @// and written using VLD1 and VST1 182 RSB r6,r6,#16 183 SUB r1,r1,r6 184 SUB r3,r3,r6,LSR #1 185 SUB r5,r5,r6,LSR #1 186 vld2.8 {d0,d1}, [r1]! 187 VST1.8 D0, [r3]! 188 VST1.8 D1, [r5]! 189 uv_col_loop_end: 190 ADD r1, r1, r10 191 ADD r3, r3, r11 192 ADD r5, r5, r11 193 SUBS r4, r4, #1 194 BGT uv_row_loop 195 196 exit: 197 LDMFD sp!,{r4-r12, pc} 198 199 200 201 202 203 204