Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2015 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkTypes.h"
      9 #include <arm_neon.h>
     10 
     11 void sk_memset32_neon(uint32_t dst[], uint32_t value, int count) {
     12     uint32x4_t   v4  = vdupq_n_u32(value);
     13     uint32x4x4_t v16 = { v4, v4, v4, v4 };
     14 
     15     while (count >= 16) {
     16         vst4q_u32(dst, v16);  // This swizzles, but we don't care: all lanes are the same, value.
     17         dst   += 16;
     18         count -= 16;
     19     }
     20     SkASSERT(count < 16);
     21     switch (count / 4) {
     22         case 3: vst1q_u32(dst, v4); dst += 4; count -= 4;
     23         case 2: vst1q_u32(dst, v4); dst += 4; count -= 4;
     24         case 1: vst1q_u32(dst, v4); dst += 4; count -= 4;
     25     }
     26     SkASSERT(count < 4);
     27     if (count >= 2) {
     28         vst1_u32(dst, vget_low_u32(v4));
     29         dst   += 2;
     30         count -= 2;
     31     }
     32     SkASSERT(count < 2);
     33     if (count > 0) {
     34         *dst = value;
     35     }
     36 }
     37 
     38 void sk_memset16_neon(uint16_t dst[], uint16_t value, int count) {
     39     uint16x8_t   v8  = vdupq_n_u16(value);
     40     uint16x8x4_t v32 = { v8, v8, v8, v8 };
     41 
     42     while (count >= 32) {
     43         vst4q_u16(dst, v32);  // This swizzles, but we don't care: all lanes are the same, value.
     44         dst   += 32;
     45         count -= 32;
     46     }
     47     SkASSERT(count < 32);
     48     switch (count / 8) {
     49         case 3: vst1q_u16(dst, v8); dst += 8; count -= 8;
     50         case 2: vst1q_u16(dst, v8); dst += 8; count -= 8;
     51         case 1: vst1q_u16(dst, v8); dst += 8; count -= 8;
     52     }
     53     SkASSERT(count < 8);
     54     if (count >= 4) {
     55         vst1_u16(dst, vget_low_u16(v8));
     56         dst   += 4;
     57         count -= 4;
     58     }
     59     SkASSERT(count < 4);
     60     switch (count) {
     61         case 3: *dst++ = value;
     62         case 2: *dst++ = value;
     63         case 1: *dst   = value;
     64     }
     65 }
     66 
     67