1 // Copyright 2017, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include "examples.h" 28 29 #define __ masm-> 30 31 void GenerateMandelBrot(MacroAssembler* masm) { 32 const QRegister kCReal = q0; 33 const QRegister kCImag = q1; 34 35 const QRegister kCRealStep = q13; 36 const QRegister kCImagStep = q14; 37 38 const QRegister kModSqLimit = q15; 39 40 // Save register values. 41 __ Push(RegisterList(r4, r5, r6)); 42 43 __ Vmov(F32, kCRealStep, 0.125); 44 __ Vmov(F32, kCImagStep, 0.0625); 45 46 const Register kZero = r2; 47 __ Mov(kZero, 0); 48 49 const DRegister kStars = d6; 50 const DRegister kSpaces = d7; 51 // Output characters - packed 4 characters into 32 bits. 52 __ Vmov(I8, kStars, '*'); 53 __ Vmov(I8, kSpaces, ' '); 54 55 const DRegisterLane kNegTwo = DRegisterLane(d7, 1); 56 __ Vmov(s15, -2.0); 57 58 // Imaginary part of c. 59 __ Vdup(Untyped32, kCImag, kNegTwo); 60 61 // Max modulus squared. 62 __ Vmov(F32, kModSqLimit, 4.0); 63 64 // Height of output in characters. 65 __ Mov(r4, 64); 66 67 // String length will be 129, so need 132 bytes of space. 68 const uint32_t kStringLength = 132; 69 70 // Make space for our string. 71 __ Sub(sp, sp, kStringLength); 72 73 // Set up a starting pointer for the string. 74 const Register kStringPtr = r6; 75 __ Mov(kStringPtr, sp); 76 77 // Loop over imaginary values of c from -2 to 2, taking 78 // 64 equally spaced values in the range. 79 { 80 Label c_imag_loop; 81 82 __ Bind(&c_imag_loop); 83 84 // Real part of c. 85 // Store 4 equally spaced values in q0 (kCReal) to use SIMD. 86 __ Vmov(s0, -2.0); 87 __ Vmov(s1, -1.96875); 88 __ Vmov(s2, -1.9375); 89 __ Vmov(s3, -1.90625); 90 91 // Width of output in terms of sets of 4 characters - twice that 92 // of height to compensate for ratio of character height to width. 93 __ Mov(r5, 32); 94 95 const Register kWriteCursor = r3; 96 // Set a cursor ready to write the next line. 97 __ Mov(kWriteCursor, kStringPtr); 98 99 // Loop over real values of c from -2 to 2, processing 100 // 4 different values simultaneously using SIMD. 101 { 102 const QRegister kFlags = q2; 103 const DRegister kLowerFlags = d4; 104 105 Label c_real_loop; 106 __ Bind(&c_real_loop); 107 108 // Get number of iterations. 109 __ Add(r1, r0, 1); 110 111 // Perform the iterations of z(n+1) = zn^2 + c using SIMD. 112 // If the result is that c is in the set, the element of 113 // kFlags will be 0, else ~0. 114 { 115 const QRegister kZReal = q8; 116 const QRegister kZImag = q9; 117 118 // Real part of z. 119 __ Vmov(F32, kZReal, 0.0); 120 121 // Imaginary part of z. 122 __ Vmov(F32, kZImag, 0.0); 123 124 __ Vmov(F32, kFlags, 0.0); 125 126 Label iterative_formula_start, iterative_formula_end; 127 __ Bind(&iterative_formula_start); 128 __ Subs(r1, r1, 1); 129 __ B(le, &iterative_formula_end); 130 131 // z(n+1) = zn^2 + c. 132 // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2. 133 // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn) 134 135 __ Vmul(F32, q10, kZReal, kZImag); // re(zn) * im(zn) 136 137 __ Vmul(F32, kZReal, kZReal, kZReal); // re(zn)^2 138 __ Vadd(F32, kZReal, kCReal, kZReal); // re(c) + re(zn)^2 139 __ Vmls(F32, kZReal, kZImag, kZImag); // re(c) + re(zn)^2 - im(zn)^2 140 141 __ Vmov(F32, kZImag, kCImag); // im(c) 142 __ Vmls(F32, kZImag, q10, kNegTwo); // im(c) + 2 * re(zn) * im(zn) 143 144 __ Vmul(F32, q10, kZReal, kZReal); // re(z(n+1))^2 145 __ Vmla(F32, q10, kZImag, kZImag); // re(z(n+1))^2 + im(z(n+1))^2 146 __ Vcgt(F32, q10, q10, kModSqLimit); // |z(n+1)|^2 > 4 ? ~0 : 0 147 __ Vorr(F32, kFlags, kFlags, q10); // (~0/0) | above result 148 149 __ B(&iterative_formula_start); 150 __ Bind(&iterative_formula_end); 151 } 152 153 // Narrow twice so that each mask is 8 bits, packed into 154 // a single 32 bit register s4. 155 // kLowerFlags is the lower half of kFlags, so the second narrow will 156 // be working on the results of the first to halve the size of each 157 // representation again. 158 __ Vmovn(I32, kLowerFlags, kFlags); 159 __ Vmovn(I16, kLowerFlags, kFlags); 160 161 // '*' if in set, ' ' if not. 162 __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars); 163 164 // Add this to the string. 165 __ Vst1(Untyped32, 166 NeonRegisterList(kLowerFlags, 0), 167 AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex)); 168 169 // Increase real part of c. 170 __ Vadd(F32, kCReal, kCReal, kCRealStep); 171 172 __ Subs(r5, r5, 1); 173 __ B(ne, &c_real_loop); 174 } 175 176 // Put terminating character. 177 __ Strb(kZero, MemOperand(kWriteCursor)); 178 179 // Print the string. 180 __ Printf("%s\n", kStringPtr); 181 182 // Increase imaginary part of c. 183 __ Vadd(F32, kCImag, kCImag, kCImagStep); 184 185 __ Subs(r4, r4, 1); 186 __ B(ne, &c_imag_loop); 187 } 188 // Restore stack pointer. 189 __ Add(sp, sp, kStringLength); 190 // Restore register values. 191 __ Pop(RegisterList(r4, r5, r6)); 192 __ Bx(lr); 193 } 194 195 #ifndef TEST_EXAMPLES 196 int main() { 197 MacroAssembler masm; 198 // Generate the code for the example function. 199 Label mandelbrot; 200 masm.Bind(&mandelbrot); 201 GenerateMandelBrot(&masm); 202 masm.FinalizeCode(); 203 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32 204 // There is no simulator defined for VIXL AArch32. 205 printf("This example cannot be simulated\n"); 206 #else 207 byte* code = masm.GetBuffer()->GetStartAddress<byte*>(); 208 uint32_t code_size = masm.GetSizeOfCodeGenerated(); 209 ExecutableMemory memory(code, code_size); 210 // Run the example function. 211 double (*mandelbrot_func)(uint32_t) = 212 memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot, 213 masm.GetInstructionSetInUse()); 214 uint32_t iterations = 1000; 215 (*mandelbrot_func)(iterations); 216 #endif 217 return 0; 218 } 219 #endif // TEST_EXAMPLES 220