1 ; 2 ; Copyright (C) 2009 The Android Open Source Project 3 ; 4 ; Licensed under the Apache License, Version 2.0 (the "License"); 5 ; you may not use this file except in compliance with the License. 6 ; You may obtain a copy of the License at 7 ; 8 ; http://www.apache.org/licenses/LICENSE-2.0 9 ; 10 ; Unless required by applicable law or agreed to in writing, software 11 ; distributed under the License is distributed on an "AS IS" BASIS, 12 ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ; See the License for the specific language governing permissions and 14 ; limitations under the License. 15 ; 16 17 REQUIRE8 18 PRESERVE8 19 20 AREA |.text|, CODE 21 22 EXPORT h264bsdWriteMacroblock 23 24 ; Input / output registers 25 image RN 0 26 data RN 1 27 width RN 2 28 luma RN 3 29 cb RN 4 30 cr RN 5 31 cwidth RN 6 32 33 ; -- NEON registers -- 34 35 qRow0 QN Q0.U8 36 qRow1 QN Q1.U8 37 qRow2 QN Q2.U8 38 qRow3 QN Q3.U8 39 qRow4 QN Q4.U8 40 qRow5 QN Q5.U8 41 qRow6 QN Q6.U8 42 qRow7 QN Q7.U8 43 qRow8 QN Q8.U8 44 qRow9 QN Q9.U8 45 qRow10 QN Q10.U8 46 qRow11 QN Q11.U8 47 qRow12 QN Q12.U8 48 qRow13 QN Q13.U8 49 qRow14 QN Q14.U8 50 qRow15 QN Q15.U8 51 52 dRow0 DN D0.U8 53 dRow1 DN D1.U8 54 dRow2 DN D2.U8 55 dRow3 DN D3.U8 56 dRow4 DN D4.U8 57 dRow5 DN D5.U8 58 dRow6 DN D6.U8 59 dRow7 DN D7.U8 60 dRow8 DN D8.U8 61 dRow9 DN D9.U8 62 dRow10 DN D10.U8 63 dRow11 DN D11.U8 64 dRow12 DN D12.U8 65 dRow13 DN D13.U8 66 dRow14 DN D14.U8 67 dRow15 DN D15.U8 68 69 ;/*------------------------------------------------------------------------------ 70 ; 71 ; Function: h264bsdWriteMacroblock 72 ; 73 ; Functional description: 74 ; Write one macroblock into the image. Both luma and chroma 75 ; components will be written at the same time. 76 ; 77 ; Inputs: 78 ; data pointer to macroblock data to be written, 256 values for 79 ; luma followed by 64 values for both chroma components 80 ; 81 ; Outputs: 82 ; image pointer to the image where the macroblock will be written 83 ; 84 ; Returns: 85 ; none 86 ; 87 ;------------------------------------------------------------------------------*/ 88 89 h264bsdWriteMacroblock 90 PUSH {r4-r6,lr} 91 VPUSH {q4-q7} 92 93 LDR width, [image, #4] 94 LDR luma, [image, #0xC] 95 LDR cb, [image, #0x10] 96 LDR cr, [image, #0x14] 97 98 99 ; Write luma 100 VLD1 {qRow0, qRow1}, [data]! 101 LSL width, width, #4 102 VLD1 {qRow2, qRow3}, [data]! 103 LSR cwidth, width, #1 104 VST1 {qRow0}, [luma@128], width 105 VLD1 {qRow4, qRow5}, [data]! 106 VST1 {qRow1}, [luma@128], width 107 VLD1 {qRow6, qRow7}, [data]! 108 VST1 {qRow2}, [luma@128], width 109 VLD1 {qRow8, qRow9}, [data]! 110 VST1 {qRow3}, [luma@128], width 111 VLD1 {qRow10, qRow11}, [data]! 112 VST1 {qRow4}, [luma@128], width 113 VLD1 {qRow12, qRow13}, [data]! 114 VST1 {qRow5}, [luma@128], width 115 VLD1 {qRow14, qRow15}, [data]! 116 VST1 {qRow6}, [luma@128], width 117 118 VLD1 {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3 119 VST1 {qRow7}, [luma@128], width 120 VLD1 {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7 121 VST1 {qRow8}, [luma@128], width 122 VLD1 {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3 123 VST1 {qRow9}, [luma@128], width 124 VLD1 {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7 125 VST1 {qRow10}, [luma@128], width 126 VST1 {dRow0}, [cb@64], cwidth 127 VST1 {dRow8}, [cr@64], cwidth 128 VST1 {qRow11}, [luma@128], width 129 VST1 {dRow1}, [cb@64], cwidth 130 VST1 {dRow9}, [cr@64], cwidth 131 VST1 {qRow12}, [luma@128], width 132 VST1 {dRow2}, [cb@64], cwidth 133 VST1 {dRow10}, [cr@64], cwidth 134 VST1 {qRow13}, [luma@128], width 135 VST1 {dRow3}, [cb@64], cwidth 136 VST1 {dRow11}, [cr@64], cwidth 137 VST1 {qRow14}, [luma@128], width 138 VST1 {dRow4}, [cb@64], cwidth 139 VST1 {dRow12}, [cr@64], cwidth 140 VST1 {qRow15}, [luma] 141 VST1 {dRow5}, [cb@64], cwidth 142 VST1 {dRow13}, [cr@64], cwidth 143 VST1 {dRow6}, [cb@64], cwidth 144 VST1 {dRow14}, [cr@64], cwidth 145 VST1 {dRow7}, [cb@64] 146 VST1 {dRow15}, [cr@64] 147 148 VPOP {q4-q7} 149 POP {r4-r6,pc} 150 END 151 152 153