1 @/***************************************************************************** 2 @* 3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 @* 5 @* Licensed under the Apache License, Version 2.0 (the "License"); 6 @* you may not use this file except in compliance with the License. 7 @* You may obtain a copy of the License at: 8 @* 9 @* http://www.apache.org/licenses/LICENSE-2.0 10 @* 11 @* Unless required by applicable law or agreed to in writing, software 12 @* distributed under the License is distributed on an "AS IS" BASIS, 13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @* See the License for the specific language governing permissions and 15 @* limitations under the License. 16 @* 17 @*****************************************************************************/ 18 @/** 19 @/******************************************************************************* 20 @* @file 21 @* ihevcd_fmt_conv_420sp_to_420sp.s 22 @* 23 @* @brief 24 @* contains function definitions for format conversions 25 @* 26 @* @author 27 @* ittiam 28 @* 29 @* @par list of functions: 30 @* 31 @* 32 @* @remarks 33 @* none 34 @* 35 @*******************************************************************************/ 36 .equ DO1STROUNDING, 0 37 38 @ ARM 39 @ 40 @ PRESERVE8 41 42 .text 43 .p2align 2 44 45 46 47 48 49 @/***************************************************************************** 50 @* * 51 @* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * 52 @* * 53 @* Description : This function conversts the image from YUV420SP color * 54 @* space to 420SP color space(UV interleaved). * 55 @* * 56 @* Arguments : R0 pu1_y * 57 @* R1 pu1_uv * 58 @* R2 pu1_dest_y * 59 @* R3 pu1_dest_uv * 60 @* [R13 #40] u2_width * 61 @* [R13 #44] u2_height * 62 @* [R13 #48] u2_stridey * 63 @* [R13 #52] u2_stridechroma * 64 @* [R13 #56] u2_dest_stridey * 65 @* [R13 #60] u2_dest_stridechroma * 66 @* * 67 @* Values Returned : None * 68 @* * 69 @* Register Usage : R0 - R14 * 70 @* * 71 @* Stack Usage : 40 Bytes * 72 @* * 73 @* Interruptibility : Interruptible * 74 @* * 75 @* Known Limitations * 76 @* Assumptions: Image Width: Assumed to be multiple of 2 and * 77 @* Image Height: Assumed to be even. * 78 @* * 79 @* Revision History : * 80 @* DD MM YYYY Author(s) Changes (Describe the changes made) * 81 @* 16 05 2012 Naveen SR draft * 82 @* * 83 @*****************************************************************************/ 84 85 .global ihevcd_fmt_conv_420sp_to_420sp_a9q 86 .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function 87 ihevcd_fmt_conv_420sp_to_420sp_a9q: 88 89 STMFD sp!,{r4-r12, lr} 90 91 92 LDR r5,[sp,#56] @//Load u2_dest_stridey 93 94 LDR r7,[sp,#48] @//Load u2_stridey 95 LDR r8,[sp,#40] @//Load u2_width 96 LDR r9,[sp,#44] @//Load u2_height 97 98 SUB r10,r7,r8 @// Src Y increment 99 SUB r11,r5,r8 @// Dst Y increment 100 101 @/* Copy Y */ 102 103 MOV r4,r9 @// Copying height 104 y_row_loop: 105 MOV r6,r8 @// Copying width 106 107 y_col_loop: 108 PLD [r0, #128] 109 SUB r6,r6,#32 110 VLD1.8 D0,[r0]! 111 VLD1.8 D1,[r0]! 112 VLD1.8 D2,[r0]! 113 VLD1.8 D3,[r0]! 114 VST1.8 D0,[R2]! 115 VST1.8 D1,[R2]! 116 VST1.8 D2,[R2]! 117 VST1.8 D3,[R2]! 118 CMP r6,#32 119 BGE y_col_loop 120 CMP r6,#0 121 BEQ y_col_loop_end 122 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 123 @//Ex if width is 162, above loop will process 160 pixels. And 124 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 125 @// and written using VLD1 and VST1 126 RSB r6,r6,#32 127 SUB r0,r0,r6 128 SUB R2,R2,r6 129 VLD1.8 D0,[r0]! 130 VLD1.8 D1,[r0]! 131 VLD1.8 D2,[r0]! 132 VLD1.8 D3,[r0]! 133 VST1.8 D0,[R2]! 134 VST1.8 D1,[R2]! 135 VST1.8 D2,[R2]! 136 VST1.8 D3,[R2]! 137 138 y_col_loop_end: 139 ADD r0, r0, r10 140 ADD R2, R2, r11 141 SUBS r4, r4, #1 142 BGT y_row_loop 143 144 145 146 @/* Copy UV */ 147 148 LDR r5,[sp,#60] @//Load u2_dest_stridechroma 149 LDR r7,[sp,#52] @//Load u2_stridechroma 150 151 MOV r9,r9,LSR #1 @// height/2 152 @ MOV r8,r8,LSR #1 @// Width/2 153 154 MOV R2,R3 @pu1_dest_uv 155 156 SUB r10,r7,r8 @// Src UV increment 157 SUB r11,r5,r8 @// Dst UV increment 158 159 MOV r4,r9 @// Copying height 160 uv_row_loop: 161 MOV r6,r8 @// Copying width 162 163 uv_col_loop: 164 165 PLD [r1, #128] 166 SUB r6,r6,#16 167 VLD1.8 D0,[r1]! 168 VLD1.8 D1,[r1]! 169 VST1.8 D0,[R2]! 170 VST1.8 D1,[R2]! 171 CMP r6,#16 172 BGE uv_col_loop 173 CMP r6,#0 174 BEQ u_col_loop_end 175 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 176 @//Ex if width is 162, above loop will process 160 pixels. And 177 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 178 @// and written using VLD1 and VST1 179 RSB r6,r6,#16 180 SUB r1,r1,r6 181 SUB R2,R2,r6 182 VLD1.8 D0, [r1]! 183 VLD1.8 D1, [r1]! 184 VST1.8 D0, [R2]! 185 VST1.8 D1, [R2]! 186 187 u_col_loop_end: 188 ADD r1, r1, r10 189 ADD R2, R2, r11 190 SUBS r4, r4, #1 191 BGT uv_row_loop 192 193 exit: 194 LDMFD sp!,{r4-r12, pc} 195 196 197 .section .note.GNU-stack,"",%progbits 198 199