Home | History | Annotate | Download | only in aarch32
      1 // Copyright 2017, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include "examples.h"
     28 
     29 #define __ masm->
     30 
     31 void GenerateMandelBrot(MacroAssembler* masm) {
     32   const QRegister kCReal = q0;
     33   const QRegister kCImag = q1;
     34 
     35   const QRegister kCRealStep = q13;
     36   const QRegister kCImagStep = q14;
     37 
     38   const QRegister kModSqLimit = q15;
     39 
     40   // Save register values.
     41   __ Push(RegisterList(r4, r5, r6));
     42 
     43   __ Vmov(F32, kCRealStep, 0.125);
     44   __ Vmov(F32, kCImagStep, 0.0625);
     45 
     46   const Register kZero = r2;
     47   __ Mov(kZero, 0);
     48 
     49   const DRegister kStars = d6;
     50   const DRegister kSpaces = d7;
     51   // Output characters - packed 4 characters into 32 bits.
     52   __ Vmov(I8, kStars, '*');
     53   __ Vmov(I8, kSpaces, ' ');
     54 
     55   const DRegisterLane kNegTwo = DRegisterLane(d7, 1);
     56   __ Vmov(s15, -2.0);
     57 
     58   // Imaginary part of c.
     59   __ Vdup(Untyped32, kCImag, kNegTwo);
     60 
     61   // Max modulus squared.
     62   __ Vmov(F32, kModSqLimit, 4.0);
     63 
     64   // Height of output in characters.
     65   __ Mov(r4, 64);
     66 
     67   // String length will be 129, so need 132 bytes of space.
     68   const uint32_t kStringLength = 132;
     69 
     70   // Make space for our string.
     71   __ Sub(sp, sp, kStringLength);
     72 
     73   // Set up a starting pointer for the string.
     74   const Register kStringPtr = r6;
     75   __ Mov(kStringPtr, sp);
     76 
     77   // Loop over imaginary values of c from -2 to 2, taking
     78   // 64 equally spaced values in the range.
     79   {
     80     Label c_imag_loop;
     81 
     82     __ Bind(&c_imag_loop);
     83 
     84     // Real part of c.
     85     // Store 4 equally spaced values in q0 (kCReal) to use SIMD.
     86     __ Vmov(s0, -2.0);
     87     __ Vmov(s1, -1.96875);
     88     __ Vmov(s2, -1.9375);
     89     __ Vmov(s3, -1.90625);
     90 
     91     // Width of output in terms of sets of 4 characters - twice that
     92     // of height to compensate for ratio of character height to width.
     93     __ Mov(r5, 32);
     94 
     95     const Register kWriteCursor = r3;
     96     // Set a cursor ready to write the next line.
     97     __ Mov(kWriteCursor, kStringPtr);
     98 
     99     // Loop over real values of c from -2 to 2, processing
    100     // 4 different values simultaneously using SIMD.
    101     {
    102       const QRegister kFlags = q2;
    103       const DRegister kLowerFlags = d4;
    104 
    105       Label c_real_loop;
    106       __ Bind(&c_real_loop);
    107 
    108       // Get number of iterations.
    109       __ Add(r1, r0, 1);
    110 
    111       // Perform the iterations of z(n+1) = zn^2 + c using SIMD.
    112       // If the result is that c is in the set, the element of
    113       // kFlags will be 0, else ~0.
    114       {
    115         const QRegister kZReal = q8;
    116         const QRegister kZImag = q9;
    117 
    118         // Real part of z.
    119         __ Vmov(F32, kZReal, 0.0);
    120 
    121         // Imaginary part of z.
    122         __ Vmov(F32, kZImag, 0.0);
    123 
    124         __ Vmov(F32, kFlags, 0.0);
    125 
    126         Label iterative_formula_start, iterative_formula_end;
    127         __ Bind(&iterative_formula_start);
    128         __ Subs(r1, r1, 1);
    129         __ B(le, &iterative_formula_end);
    130 
    131         // z(n+1) = zn^2 + c.
    132         // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2.
    133         // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn)
    134 
    135         __ Vmul(F32, q10, kZReal, kZImag);  // re(zn) * im(zn)
    136 
    137         __ Vmul(F32, kZReal, kZReal, kZReal);  // re(zn)^2
    138         __ Vadd(F32, kZReal, kCReal, kZReal);  // re(c) + re(zn)^2
    139         __ Vmls(F32, kZReal, kZImag, kZImag);  // re(c) + re(zn)^2 - im(zn)^2
    140 
    141         __ Vmov(F32, kZImag, kCImag);        // im(c)
    142         __ Vmls(F32, kZImag, q10, kNegTwo);  // im(c) + 2 * re(zn) * im(zn)
    143 
    144         __ Vmul(F32, q10, kZReal, kZReal);    // re(z(n+1))^2
    145         __ Vmla(F32, q10, kZImag, kZImag);    // re(z(n+1))^2 + im(z(n+1))^2
    146         __ Vcgt(F32, q10, q10, kModSqLimit);  // |z(n+1)|^2 > 4 ? ~0 : 0
    147         __ Vorr(F32, kFlags, kFlags, q10);    // (~0/0) | above result
    148 
    149         __ B(&iterative_formula_start);
    150         __ Bind(&iterative_formula_end);
    151       }
    152 
    153       // Narrow twice so that each mask is 8 bits, packed into
    154       // a single 32 bit register s4.
    155       // kLowerFlags is the lower half of kFlags, so the second narrow will
    156       // be working on the results of the first to halve the size of each
    157       // representation again.
    158       __ Vmovn(I32, kLowerFlags, kFlags);
    159       __ Vmovn(I16, kLowerFlags, kFlags);
    160 
    161       // '*' if in set, ' ' if not.
    162       __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars);
    163 
    164       // Add this to the string.
    165       __ Vst1(Untyped32,
    166               NeonRegisterList(kLowerFlags, 0),
    167               AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex));
    168 
    169       // Increase real part of c.
    170       __ Vadd(F32, kCReal, kCReal, kCRealStep);
    171 
    172       __ Subs(r5, r5, 1);
    173       __ B(ne, &c_real_loop);
    174     }
    175 
    176     // Put terminating character.
    177     __ Strb(kZero, MemOperand(kWriteCursor));
    178 
    179     // Print the string.
    180     __ Printf("%s\n", kStringPtr);
    181 
    182     // Increase imaginary part of c.
    183     __ Vadd(F32, kCImag, kCImag, kCImagStep);
    184 
    185     __ Subs(r4, r4, 1);
    186     __ B(ne, &c_imag_loop);
    187   }
    188   // Restore stack pointer.
    189   __ Add(sp, sp, kStringLength);
    190   // Restore register values.
    191   __ Pop(RegisterList(r4, r5, r6));
    192   __ Bx(lr);
    193 }
    194 
    195 #ifndef TEST_EXAMPLES
    196 int main() {
    197   MacroAssembler masm;
    198   // Generate the code for the example function.
    199   Label mandelbrot;
    200   masm.Bind(&mandelbrot);
    201   GenerateMandelBrot(&masm);
    202   masm.FinalizeCode();
    203 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
    204   // There is no simulator defined for VIXL AArch32.
    205   printf("This example cannot be simulated\n");
    206 #else
    207   byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
    208   uint32_t code_size = masm.GetSizeOfCodeGenerated();
    209   ExecutableMemory memory(code, code_size);
    210   // Run the example function.
    211   double (*mandelbrot_func)(uint32_t) =
    212       memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot,
    213                                                  masm.GetInstructionSetInUse());
    214   uint32_t iterations = 1000;
    215   (*mandelbrot_func)(iterations);
    216 #endif
    217   return 0;
    218 }
    219 #endif  // TEST_EXAMPLES
    220