Home | History | Annotate | Download | only in mips64
      1 #include <stdio.h>
      2 
      3 #define N 256
      4 
      5 unsigned long long reg_val_double[N];
      6 
      7 void init_reg_val_double()
      8 {
      9    unsigned long c = 19650218UL;
     10    int i;
     11    reg_val_double[0]= c & 0xffffffffUL;
     12    for (i = 1; i < N; i++) {
     13       reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^
     14                           (reg_val_double[i - 1] >> 30)) + i);
     15    }
     16 }
     17 
     18 
     19 /* Make a copy of original array to prevent the unexpected changes by Atomic Add
     20    Instructions */
     21 unsigned long long reg_val_double_copy[N];
     22 
     23 void copy_reg_val_double()
     24 {
     25    int i;
     26    for (i = 0; i < N; i++) {
     27       reg_val_double_copy[i] = reg_val_double[i];
     28    }
     29 }
     30 
     31 /* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear
     32    instructions. After executing each instruction we must check both memory
     33    location and register value.
     34 
     35    1: Move arguments (offset and base address) to registers
     36    2: Add offset and base address to make absolute address
     37    3: Execute instruction
     38    4: Move result from register ($t3)
     39    5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses)
     40 */
     41 #define TEST1_32(instruction, offset,mem)                    \
     42 {                                                            \
     43    unsigned long out = 0;                                    \
     44    unsigned long res_mem = 0;                                \
     45    __asm__ volatile(                                         \
     46      "move         $t0, %2"        "\n\t"                    \
     47      "move         $t1, %3"        "\n\t"                    \
     48      "daddu        $t0, $t1, $t0"  "\n\t"                    \
     49      instruction " $t3, ($t0)"     "\n\t"                    \
     50      "move         %0,  $t3"       "\n\t"                    \
     51      "lw           %1,  0($t0)"    "\n\t"                    \
     52      : "=&r" (out), "=&r"(res_mem)                           \
     53      : "r" (mem) , "r" (offset)                              \
     54      : "$12", "$13", "cc", "memory"                          \
     55      );                                                      \
     56    printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
     57           instruction, offset, out, res_mem);                \
     58 }
     59 
     60 #define TEST1_64(instruction, offset,mem)                     \
     61 {                                                             \
     62    unsigned long out = 0;                                     \
     63    unsigned long res_mem = 0;                                 \
     64    __asm__ volatile(                                          \
     65      "move         $t0, %2"        "\n\t"                     \
     66      "move         $t1, %3"        "\n\t"                     \
     67      "daddu        $t0, $t1, $t0"  "\n\t"                     \
     68      instruction " $t3, ($t0)"     "\n\t"                     \
     69      "move         %0,  $t3"       "\n\t"                     \
     70      "ld           %1,  0($t0)"    "\n\t"                     \
     71      : "=&r" (out), "=&r"(res_mem)                            \
     72      : "r" (mem) , "r" (offset)                               \
     73      : "$12", "$13", "cc", "memory"                           \
     74      );                                                       \
     75    printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
     76           instruction, offset, out, res_mem);                 \
     77 }
     78 
     79 /* Test 2 macro is used for pop/dpop/baddu instructions. After executing each
     80    instructions the macro performs following operations:
     81 
     82    1: Move arguments to registers
     83    2: Execute instruction
     84    3: Move result to register ($t3)
     85 */
     86 #define TEST2(instruction, RSVal, RTVal)                            \
     87 {                                                                   \
     88    unsigned long out;                                               \
     89    __asm__ volatile(                                                \
     90       "move $t1, %1"  "\n\t"                                        \
     91       "move $t2, %2"  "\n\t"                                        \
     92       instruction     "\n\t"                                        \
     93       "move %0, $t3"  "\n\t"                                        \
     94       : "=&r" (out)                                                 \
     95       : "r" (RSVal), "r" (RTVal)                                    \
     96       : "$12", "$13", "cc", "memory"                                \
     97         );                                                          \
     98    printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n",                 \
     99           instruction, out, (long long) RSVal, (long long) RTVal);  \
    100 }
    101 
    102 /* TEST3 macro is used for store atomic add and store atomic add doubleword
    103    instructions. Following operations are performed by the test macro:
    104 
    105    1: Move arguments to the register
    106    2: Add offset and base address to make absolute address
    107    3: Execute instruction
    108    4: Load memory data
    109 */
    110 #define TEST3(instruction, offset, mem, value)                   \
    111 {                                                                \
    112     unsigned long out = 0;                                       \
    113     unsigned long outPre = 0;                                    \
    114    __asm__ volatile(                                             \
    115      "move         $t0, %2"        "\n\t"                        \
    116      "move         $t1, %3"        "\n\t"                        \
    117      "daddu        $t0, $t1, $t0"  "\n\t"                        \
    118      "ld           %1,  0($t0)"    "\n\t"                        \
    119      "move         $t2, %4"        "\n\t"                        \
    120      instruction " $t2, ($t0)"     "\n\t"                        \
    121      "ld           %0,  0($t0)"    "\n\t"                        \
    122      : "=&r" (out), "=&r" (outPre)                               \
    123      : "r" (mem) , "r" (offset), "r" (value)                     \
    124      : "$12", "$13", "$14", "cc", "memory"                       \
    125      );                                                          \
    126      printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n",  \
    127             instruction, value, outPre, out);                    \
    128 }
    129 
    130 /* TEST4_32/64 is used for load atomic add/swap instructions. Following
    131    operations are performed by macro after execution of each instruction:
    132 
    133    1: Move arguments to register.
    134    2: Add offset and base address to make absolute address.
    135    3: Execute instruction.
    136    4: Move result to register.
    137    5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit).
    138 */
    139 #define TEST4_32(instruction, offset, mem)                   \
    140 {                                                            \
    141     unsigned long out = 0;                                   \
    142     unsigned long res_mem = 0;                               \
    143    __asm__ volatile(                                         \
    144       "move         $t0, %2"          "\n\t"                 \
    145       "move         $t1, %3"          "\n\t"                 \
    146       "daddu        $t0, $t0, $t1"    "\n\t"                 \
    147       instruction " $t3, ($t0), $t1"  "\n\t"                 \
    148       "move         %0,  $t3"         "\n\t"                 \
    149       "lw           %1,  0($t0)"      "\n\t"                 \
    150       : "=&r" (out), "=&r"(res_mem)                          \
    151       : "r" (mem) , "r" (offset)                             \
    152       : "$12", "$13", "cc", "memory"                         \
    153      );                                                      \
    154    printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
    155           instruction, offset, out, res_mem);                \
    156 }
    157 
    158 #define TEST4_64(instruction, offset, mem)                    \
    159 {                                                             \
    160     unsigned long out = 0;                                    \
    161     unsigned long res_mem = 0;                                \
    162    __asm__ volatile(                                          \
    163       "move         $t0, %2"          "\n\t"                  \
    164       "move         $t1, %3"          "\n\t"                  \
    165       "daddu        $t0, $t0,   $t1"  "\n\t"                  \
    166       instruction " $t3, ($t0), $t1"  "\n\t"                  \
    167       "move         %0,  $t3"         "\n\t"                  \
    168       "ld           %1,  0($t0)"      "\n\t"                  \
    169      : "=&r" (out), "=&r"(res_mem)                            \
    170      : "r" (mem) , "r" (offset)                               \
    171      : "$12", "$13", "cc", "memory"                           \
    172      );                                                       \
    173    printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
    174           instruction, offset, out, res_mem);                 \
    175 }
    176 
    177 typedef enum {
    178    BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD,
    179    LAS, LASD, LAC, LACD
    180 } cvm_op;
    181 
    182 int main()
    183 {
    184 #if (_MIPS_ARCH_OCTEON2)
    185    init_reg_val_double();
    186    int i,j;
    187    cvm_op op;
    188    for (op = BADDU; op <= LACD; op++) {
    189       switch(op){
    190          /* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */
    191          case BADDU: {
    192             for(i = 4; i < N; i += 4)
    193                for(j = 4; j < N; j += 4)
    194                   TEST2("baddu $t3, $t1, $t2", reg_val_double[i],
    195                                                reg_val_double[j]);
    196             break;
    197          }
    198          case POP: {  /* Count Ones in a Word - POP */
    199             for(j = 4; j < N; j += 4)
    200                TEST2("pop $t3, $t1", reg_val_double[j], 0);
    201             break;
    202          }
    203          case DPOP: {  /* Count Ones in a Doubleword - DPOP */
    204             for(j = 8; j < N; j += 8)
    205                TEST2("dpop $t3, $t1", reg_val_double[j], 0);
    206             break;
    207          }
    208          case SAA: {  /* Atomic Add Word - saa rt, (base). */
    209             copy_reg_val_double();
    210             for(j = 4; j < N; j += 4)
    211                TEST3("saa", j, reg_val_double_copy, reg_val_double[j]);
    212             break;
    213          }
    214          case SAAD: {  /* Atomic Add Double - saad rt, (base). */
    215             copy_reg_val_double();
    216             for(j = 8; j < N; j += 8)
    217                TEST3("saad", j, reg_val_double_copy, reg_val_double[j]);
    218             break;
    219          }
    220          case LAA: {  /* Load Atomic Add Word - laa rd, (base), rt. */
    221             copy_reg_val_double();
    222             for(j = 4; j < N; j += 4)
    223                TEST4_32("laa", j, reg_val_double_copy);
    224             break;
    225          }
    226          case LAAD: {  /* Load Atomic Add Double - laad rd, (base), rt */
    227             copy_reg_val_double();
    228             for(j = 8; j < N; j += 8)
    229                TEST4_64("laad ", j, reg_val_double_copy);
    230             break;
    231          }
    232          case LAW: {  /* Load Atomic Swap Word - law rd, (base), rt */
    233             copy_reg_val_double();
    234             for(j = 4; j < N; j += 4)
    235                TEST4_32("law", j, reg_val_double_copy);
    236             break;
    237          }
    238          case LAWD: {  /* Load Atomic Swap Double - lawd rd, (base), rt */
    239             copy_reg_val_double();
    240             for(j = 8; j < N; j += 8)
    241                TEST4_64("lawd", j, reg_val_double_copy);
    242             break;
    243          }
    244          case LAI: {  /* Load Atomic Increment Word - lai rd, (base) */
    245             copy_reg_val_double();
    246             for(i = 4; i < N; i += 4)
    247                TEST1_32("lai", i, reg_val_double_copy);
    248             break;
    249          }
    250          case LAID: {  /* Load Atomic Increment Double - laid rd, (base) */
    251             copy_reg_val_double();
    252             for(i = 8; i < N; i += 8)
    253               TEST1_64("laid ", i, reg_val_double_copy);
    254             break;
    255          }
    256          case LAD: {  /* Load Atomic Decrement Word - lad rd, (base) */
    257             copy_reg_val_double();
    258             for(i = 4; i < N; i += 4)
    259                TEST1_32("lad", i, reg_val_double_copy);
    260             break;
    261          }
    262          case LADD: {  /* Load Atomic Decrement Double - ladd rd, (base) */
    263             copy_reg_val_double();
    264             for(i = 8; i < N; i += 8)
    265                TEST1_64("ladd",i, reg_val_double_copy);
    266             break;
    267          }
    268          case LAS:{   /* Load Atomic Set Word - las rd, (base) */
    269             copy_reg_val_double();
    270             for(i = 4; i < N; i += 4)
    271                TEST1_32("las",i, reg_val_double_copy);
    272             break;
    273          }
    274          case LASD:{  /* Load Atomic Set Word - lasd rd, (base) */
    275             copy_reg_val_double();
    276             for(i = 8; i < N; i += 8)
    277                TEST1_64("lasd",i, reg_val_double_copy);
    278             break;
    279          }
    280          case LAC: {  /* Load Atomic Clear Word - lac rd, (base) */
    281             copy_reg_val_double();
    282             for(i = 4; i < N; i += 4)
    283                TEST1_32("lac",i, reg_val_double_copy);
    284             break;
    285          }
    286          case LACD: {  /* Load Atomic Clear Double - lacd rd, (base) */
    287             copy_reg_val_double();
    288             for(i = 8; i < N; i += 8)
    289                TEST1_64("lacd",i, reg_val_double_copy);
    290             break;
    291          }
    292          default:
    293             printf("Nothing to be executed \n");
    294       }
    295    }
    296 #endif
    297    return 0;
    298 }
    299