1 @ 2 @ Copyright (C) 2009 The Android Open Source Project 3 @ 4 @ Licensed under the Apache License, Version 2.0 (the "License"); 5 @ you may not use this file except in compliance with the License. 6 @ You may obtain a copy of the License at 7 @ 8 @ http://www.apache.org/licenses/LICENSE-2.0 9 @ 10 @ Unless required by applicable law or agreed to in writing, software 11 @ distributed under the License is distributed on an "AS IS" BASIS, 12 @ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 @ See the License for the specific language governing permissions and 14 @ limitations under the License. 15 @ 16 17 #include "asm_common.S" 18 19 require8 20 preserve8 21 22 .arm 23 .fpu neon 24 .text 25 26 /* Input / output registers */ 27 #define image r0 28 #define data r1 29 #define width r2 30 #define luma r3 31 #define cb r4 32 #define cr r5 33 #define cwidth r6 34 35 /* -- NEON registers -- */ 36 37 #define qRow0 Q0.U8 38 #define qRow1 Q1.U8 39 #define qRow2 Q2.U8 40 #define qRow3 Q3.U8 41 #define qRow4 Q4.U8 42 #define qRow5 Q5.U8 43 #define qRow6 Q6.U8 44 #define qRow7 Q7.U8 45 #define qRow8 Q8.U8 46 #define qRow9 Q9.U8 47 #define qRow10 Q10.U8 48 #define qRow11 Q11.U8 49 #define qRow12 Q12.U8 50 #define qRow13 Q13.U8 51 #define qRow14 Q14.U8 52 #define qRow15 Q15.U8 53 54 #define dRow0 D0.U8 55 #define dRow1 D1.U8 56 #define dRow2 D2.U8 57 #define dRow3 D3.U8 58 #define dRow4 D4.U8 59 #define dRow5 D5.U8 60 #define dRow6 D6.U8 61 #define dRow7 D7.U8 62 #define dRow8 D8.U8 63 #define dRow9 D9.U8 64 #define dRow10 D10.U8 65 #define dRow11 D11.U8 66 #define dRow12 D12.U8 67 #define dRow13 D13.U8 68 #define dRow14 D14.U8 69 #define dRow15 D15.U8 70 71 /*------------------------------------------------------------------------------ 72 73 Function: h264bsdWriteMacroblock 74 75 Functional description: 76 Write one macroblock into the image. Both luma and chroma 77 components will be written at the same time. 78 79 Inputs: 80 data pointer to macroblock data to be written, 256 values for 81 luma followed by 64 values for both chroma components 82 83 Outputs: 84 image pointer to the image where the macroblock will be written 85 86 Returns: 87 none 88 89 ------------------------------------------------------------------------------*/ 90 91 function h264bsdWriteMacroblock, export=1 92 PUSH {r4-r6,lr} 93 VPUSH {q4-q7} 94 95 LDR width, [image, #4] 96 LDR luma, [image, #0xC] 97 LDR cb, [image, #0x10] 98 LDR cr, [image, #0x14] 99 100 101 @ Write luma 102 VLD1 {qRow0, qRow1}, [data]! 103 LSL width, width, #4 104 VLD1 {qRow2, qRow3}, [data]! 105 LSR cwidth, width, #1 106 VST1 {qRow0}, [luma,:128], width 107 VLD1 {qRow4, qRow5}, [data]! 108 VST1 {qRow1}, [luma,:128], width 109 VLD1 {qRow6, qRow7}, [data]! 110 VST1 {qRow2}, [luma,:128], width 111 VLD1 {qRow8, qRow9}, [data]! 112 VST1 {qRow3}, [luma,:128], width 113 VLD1 {qRow10, qRow11}, [data]! 114 VST1 {qRow4}, [luma,:128], width 115 VLD1 {qRow12, qRow13}, [data]! 116 VST1 {qRow5}, [luma,:128], width 117 VLD1 {qRow14, qRow15}, [data]! 118 VST1 {qRow6}, [luma,:128], width 119 120 VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 121 VST1 {qRow7}, [luma,:128], width 122 VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 123 VST1 {qRow8}, [luma,:128], width 124 VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 125 VST1 {qRow9}, [luma,:128], width 126 VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 127 VST1 {qRow10}, [luma,:128], width 128 VST1 {dRow0}, [cb,:64], cwidth 129 VST1 {dRow8}, [cr,:64], cwidth 130 VST1 {qRow11}, [luma,:128], width 131 VST1 {dRow1}, [cb,:64], cwidth 132 VST1 {dRow9}, [cr,:64], cwidth 133 VST1 {qRow12}, [luma,:128], width 134 VST1 {dRow2}, [cb,:64], cwidth 135 VST1 {dRow10}, [cr,:64], cwidth 136 VST1 {qRow13}, [luma,:128], width 137 VST1 {dRow3}, [cb,:64], cwidth 138 VST1 {dRow11}, [cr,:64], cwidth 139 VST1 {qRow14}, [luma,:128], width 140 VST1 {dRow4}, [cb,:64], cwidth 141 VST1 {dRow12}, [cr,:64], cwidth 142 VST1 {qRow15}, [luma] 143 VST1 {dRow5}, [cb,:64], cwidth 144 VST1 {dRow13}, [cr,:64], cwidth 145 VST1 {dRow6}, [cb,:64], cwidth 146 VST1 {dRow14}, [cr,:64], cwidth 147 VST1 {dRow7}, [cb,:64] 148 VST1 {dRow15}, [cr,:64] 149 150 VPOP {q4-q7} 151 POP {r4-r6,pc} 152 @ BX lr 153 154 .endfunc 155 156 157 158