1 /* This is an example of a program which does cavium atomic memory operations 2 between two processes which share a page. This test is based on : 3 memcheck/tests/atomic_incs.c */ 4 5 #include <stdlib.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <assert.h> 9 #include <unistd.h> 10 #include <sys/wait.h> 11 #include "tests/sys_mman.h" 12 13 #define N 19 14 #define NNN 3456987 // Number of repetition. 15 16 /* Expected values */ 17 int p1_expd[N] = { 2156643710, 2156643710, 3456986, 6913974, 18 4288053322, 0, 4294967295, 19 6913974, 21777111, 20 3456986, 2153186724, 21 6913974, 21777111, 22 4294967295, 4288053323, // Test 14 23 4288053322, 4273190185, // Test 16 24 0, 0 }; // Test 18 25 26 long long int p2_expd[N] = { 12633614303292, 12633614303292, 3555751, 6913974, 27 -6913974, 0, -1, 28 6913974, 23901514779351, 29 3456986, 11950752204196, 30 6913974, 23901514779351, 31 -1, -6913973, // Test 15 32 -6913974, -23901514779351, // Test 17 33 0, 0 }; // Test 19 34 35 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 36 37 __attribute__((noinline)) void atomic_saa ( int* p, int n ) 38 { 39 #if (_MIPS_ARCH_OCTEON2) 40 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 41 __asm__ __volatile__( 42 "move $t0, %0" "\n\t" 43 "ld $t1, 0($t0)" "\n\t" // p 44 "ld $t2, 8($t0)" "\n\t" // n 45 "saa $t2, ($t1)" "\n\t" 46 : /*out*/ 47 : /*in*/ "r"(&block[0]) 48 : /*trash*/ "memory", "t0", "t1", "t2" 49 ); 50 #endif 51 } 52 53 __attribute__((noinline)) void atomic_saad ( long long int* p, int n ) 54 { 55 #if (_MIPS_ARCH_OCTEON2) 56 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 57 __asm__ __volatile__( 58 "move $t0, %0" "\n\t" 59 "ld $t1, 0($t0)" "\n\t" // p 60 "ld $t2, 8($t0)" "\n\t" // n 61 "saad $t2, ($t1)" "\n\t" 62 : /*out*/ 63 : /*in*/ "r"(&block[0]) 64 : /*trash*/ "memory", "t0", "t1", "t2" 65 ); 66 #endif 67 } 68 69 __attribute__((noinline)) void atomic_laa ( int* p, int n ) 70 { 71 #if (_MIPS_ARCH_OCTEON2) 72 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 73 __asm__ __volatile__( 74 "move $t0, %0" "\n\t" 75 "ld $t1, 0($t0)" "\n\t" // p 76 "ld $t2, 8($t0)" "\n\t" // n 77 "laa $t3, ($t1), $t2" "\n\t" 78 : /*out*/ 79 : /*in*/ "r"(&block[0]) 80 : /*trash*/ "memory", "t0", "t1", "t2" 81 ); 82 #endif 83 } 84 85 __attribute__((noinline)) void atomic_laad ( long long int* p, int n ) 86 { 87 #if (_MIPS_ARCH_OCTEON2) 88 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 89 __asm__ __volatile__( 90 "move $t0, %0" "\n\t" 91 "ld $t1, 0($t0)" "\n\t" // p 92 "ld $t2, 8($t0)" "\n\t" // n 93 "laad $t3, ($t1), $t2" "\n\t" 94 : /*out*/ 95 : /*in*/ "r"(&block[0]) 96 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 97 ); 98 #endif 99 } 100 101 __attribute__((noinline)) void atomic_law ( int* p, int n ) 102 { 103 #if (_MIPS_ARCH_OCTEON2) 104 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 105 __asm__ __volatile__( 106 "move $t0, %0" "\n\t" 107 "ld $t1, 0($t0)" "\n\t" // p 108 "ld $t2, 8($t0)" "\n\t" // n 109 "law $t3, ($t1), $t2" "\n\t" 110 : /*out*/ 111 : /*in*/ "r"(&block[0]) 112 : /*trash*/ "memory", "t0", "t1", "t2" 113 ); 114 #endif 115 } 116 117 __attribute__((noinline)) void atomic_lawd ( long long int* p, int n ) 118 { 119 #if (_MIPS_ARCH_OCTEON2) 120 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 121 __asm__ __volatile__( 122 "move $t0, %0" "\n\t" 123 "ld $t1, 0($t0)" "\n\t" // p 124 "ld $t2, 8($t0)" "\n\t" // n 125 "lawd $t3, ($t1), $t2" "\n\t" 126 : /*out*/ 127 : /*in*/ "r"(&block[0]) 128 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 129 ); 130 #endif 131 } 132 133 __attribute__((noinline)) void atomic_lai ( int* p ) 134 { 135 #if (_MIPS_ARCH_OCTEON2) 136 unsigned long block[2] = { (unsigned long)p }; 137 __asm__ __volatile__( 138 "move $t0, %0" "\n\t" 139 "ld $t1, 0($t0)" "\n\t" // p 140 "ld $t2, 8($t0)" "\n\t" // n 141 "lai $t2, ($t1)" "\n\t" 142 : /*out*/ 143 : /*in*/ "r"(&block[0]) 144 : /*trash*/ "memory", "t0", "t1", "t2" 145 ); 146 #endif 147 } 148 149 __attribute__((noinline)) void atomic_laid ( long long int* p ) 150 { 151 #if (_MIPS_ARCH_OCTEON2) 152 unsigned long block[2] = { (unsigned long)p }; 153 __asm__ __volatile__( 154 "move $t0, %0" "\n\t" 155 "ld $t1, 0($t0)" "\n\t" // p 156 "ld $t2, 8($t0)" "\n\t" // n 157 "laid $t2, ($t1)" "\n\t" 158 : /*out*/ 159 : /*in*/ "r"(&block[0]) 160 : /*trash*/ "memory", "t0", "t1", "t2" 161 ); 162 #endif 163 } 164 165 __attribute__((noinline)) void atomic_lad ( int* p ) 166 { 167 #if (_MIPS_ARCH_OCTEON2) 168 unsigned long block[2] = { (unsigned long)p }; 169 __asm__ __volatile__( 170 "move $t0, %0" "\n\t" 171 "ld $t1, 0($t0)" "\n\t" // p 172 "ld $t2, 8($t0)" "\n\t" // n 173 "lad $t2, ($t1)" "\n\t" 174 : /*out*/ 175 : /*in*/ "r"(&block[0]) 176 : /*trash*/ "memory", "t0", "t1", "t2" 177 ); 178 #endif 179 } 180 181 __attribute__((noinline)) void atomic_ladd ( long long int* p ) 182 { 183 #if (_MIPS_ARCH_OCTEON2) 184 unsigned long block[2] = { (unsigned long)p }; 185 __asm__ __volatile__( 186 "move $t0, %0" "\n\t" 187 "ld $t1, 0($t0)" "\n\t" // p 188 "ld $t2, 8($t0)" "\n\t" // n 189 "ladd $t2, ($t1)" "\n\t" 190 : /*out*/ 191 : /*in*/ "r"(&block[0]) 192 : /*trash*/ "memory", "t0", "t1", "t2" 193 ); 194 #endif 195 } 196 197 __attribute__((noinline)) void atomic_lac ( int* p ) 198 { 199 #if (_MIPS_ARCH_OCTEON2) 200 unsigned long block[2] = { (unsigned long)p }; 201 __asm__ __volatile__( 202 "move $t0, %0" "\n\t" 203 "ld $t1, 0($t0)" "\n\t" // p 204 "ld $t2, 8($t0)" "\n\t" // n 205 "lac $t2, ($t1)" "\n\t" 206 : /*out*/ 207 : /*in*/ "r"(&block[0]) 208 : /*trash*/ "memory", "t0", "t1", "t2" 209 ); 210 #endif 211 } 212 213 __attribute__((noinline)) void atomic_lacd ( long long int* p ) 214 { 215 #if (_MIPS_ARCH_OCTEON2) 216 unsigned long block[2] = { (unsigned long)p }; 217 __asm__ __volatile__( 218 "move $t0, %0" "\n\t" 219 "ld $t1, 0($t0)" "\n\t" // p 220 "ld $t2, 8($t0)" "\n\t" // n 221 "lacd $t2, ($t1)" "\n\t" 222 : /*out*/ 223 : /*in*/ "r"(&block[0]) 224 : /*trash*/ "memory", "t0", "t1", "t2" 225 ); 226 #endif 227 } 228 229 __attribute__((noinline)) void atomic_las ( int* p ) 230 { 231 #if (_MIPS_ARCH_OCTEON2) 232 unsigned long block[2] = { (unsigned long)p }; 233 __asm__ __volatile__( 234 "move $t0, %0" "\n\t" 235 "ld $t1, 0($t0)" "\n\t" // p 236 "ld $t2, 8($t0)" "\n\t" // n 237 "las $t2, ($t1)" "\n\t" 238 : /*out*/ 239 : /*in*/ "r"(&block[0]) 240 : /*trash*/ "memory", "t0", "t1", "t2" 241 ); 242 #endif 243 } 244 245 __attribute__((noinline)) void atomic_lasd ( long long int* p ) 246 { 247 #if (_MIPS_ARCH_OCTEON2) 248 unsigned long block[2] = { (unsigned long)p }; 249 __asm__ __volatile__( 250 "move $t0, %0" "\n\t" 251 "ld $t1, 0($t0)" "\n\t" // p 252 "ld $t2, 8($t0)" "\n\t" // n 253 "lasd $t2, ($t1)" "\n\t" 254 : /*out*/ 255 : /*in*/ "r"(&block[0]) 256 : /*trash*/ "memory", "t0", "t1", "t2" 257 ); 258 #endif 259 } 260 261 #define TRIOP_AND_SAA(instruction, base1, base2, n) \ 262 { \ 263 __asm__ __volatile__( \ 264 instruction" $t0, (%0), %2" "\n\t" \ 265 "saa $t0, (%1)" "\n\t" \ 266 : /*out*/ \ 267 : /*in*/ "r"(base1), "r"(base2), "r"(n) \ 268 : /*trash*/ "memory", "t0" \ 269 ); \ 270 } 271 272 #define TRIOP_AND_SAAD(instruction, base1, base2, n) \ 273 { \ 274 __asm__ __volatile__( \ 275 instruction" $t0, (%0), %2" "\n\t" \ 276 "saad $t0, (%1)" "\n\t" \ 277 : /*out*/ \ 278 : /*in*/ "r"(base1), "r"(base2), "r"(n) \ 279 : /*trash*/ "memory", "t0" \ 280 ); \ 281 } 282 283 #define BINOP_AND_SAA(instruction, base1, base2) \ 284 { \ 285 __asm__ __volatile__( \ 286 instruction" $t0, (%0)" "\n\t" \ 287 "saa $t0, (%1)" "\n\t" \ 288 : /*out*/ \ 289 : /*in*/ "r"(base1), "r"(base2) \ 290 : /*trash*/ "memory", "t0" \ 291 ); \ 292 } 293 294 #define BINOP_AND_SAAD(instruction, base1, base2) \ 295 { \ 296 __asm__ __volatile__( \ 297 instruction" $t0, (%0)" "\n\t" \ 298 "saad $t0, (%1)" "\n\t" \ 299 : /*out*/ \ 300 : /*in*/ "r"(base1), "r"(base2) \ 301 : /*trash*/ "memory", "t0" \ 302 ); \ 303 } 304 305 int main ( int argc, char** argv ) 306 { 307 #if (_MIPS_ARCH_OCTEON2) 308 int i, status; 309 char* page[N]; 310 int* p1[N]; 311 long long int* p2[N]; 312 pid_t child, pc2; 313 314 for (i = 0; i < N; i++) { 315 page[i] = mmap( 0, sysconf(_SC_PAGESIZE), 316 PROT_READ|PROT_WRITE, 317 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 318 if (page[i] == MAP_FAILED) { 319 perror("mmap failed"); 320 exit(1); 321 } 322 p1[i] = (int*)(page[i] + 0); 323 p2[i] = (long long int*)(page[i] + 256); 324 325 assert( IS_8_ALIGNED(p1[i]) ); 326 assert( IS_8_ALIGNED(p2[i]) ); 327 328 memset(page[i], 0, 1024); 329 memset(page[i], 0, 1024); 330 331 *p1[i] = 0; 332 *p2[i] = 0; 333 } 334 335 child = fork(); 336 if (child == -1) { 337 perror("fork() failed\n"); 338 return 1; 339 } 340 341 if (child == 0) { 342 /* --- CHILD --- */ 343 for (i = 0; i < NNN; i++) { 344 atomic_saa(p1[0], i); 345 atomic_saad(p2[0], i + 98765 ); /* ensure we hit the upper 32 bits */ 346 atomic_laa(p1[1], i); 347 atomic_laad(p2[1], i + 98765 ); /* ensure we hit the upper 32 bits */ 348 atomic_law(p1[2], i); 349 atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */ 350 atomic_lai(p1[3]); 351 atomic_laid(p2[3]); 352 atomic_lad(p1[4]); 353 atomic_ladd(p2[4]); 354 atomic_lac(p1[5]); 355 atomic_lacd(p2[5]); 356 atomic_las(p1[6]); 357 atomic_lasd(p2[6]); 358 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) 359 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) 360 TRIOP_AND_SAA("law ", p1[9], p1[10], i) 361 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) 362 BINOP_AND_SAA("lai ", p1[11], p1[12]) 363 BINOP_AND_SAAD("laid ", p2[11], p2[12]) 364 BINOP_AND_SAA("las ", p1[13], p1[14]) 365 BINOP_AND_SAAD("lasd ", p2[13], p2[14]) 366 BINOP_AND_SAA("lad ", p1[15], p1[16]) 367 BINOP_AND_SAAD("ladd ", p2[15], p2[16]) 368 BINOP_AND_SAA("lac ", p1[17], p1[18]) 369 BINOP_AND_SAAD("lacd ", p2[17], p2[18]) 370 } 371 return 1; 372 /* NOTREACHED */ 373 374 } 375 376 /* --- PARENT --- */ 377 for (i = 0; i < NNN; i++) { 378 atomic_saa(p1[0], i); 379 atomic_saad(p2[0], i + 98765); /* ensure we hit the upper 32 bits */ 380 atomic_laa(p1[1], i); 381 atomic_laad(p2[1], i + 98765); /* ensure we hit the upper 32 bits */ 382 atomic_law(p1[2], i); 383 atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */ 384 atomic_lai(p1[3]); 385 atomic_laid(p2[3]); 386 atomic_lad(p1[4]); 387 atomic_ladd(p2[4]); 388 atomic_lac(p1[5]); 389 atomic_lacd(p2[5]); 390 atomic_las(p1[6]); 391 atomic_lasd(p2[6]); 392 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) 393 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) 394 TRIOP_AND_SAA("law ", p1[9], p1[10], i) 395 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) 396 BINOP_AND_SAA("lai ", p1[11], p1[12]) 397 BINOP_AND_SAAD("laid ", p2[11], p2[12]) 398 BINOP_AND_SAA("las ", p1[13], p1[14]) 399 BINOP_AND_SAAD("lasd ", p2[13], p2[14]) 400 BINOP_AND_SAA("lad ", p1[15], p1[16]) 401 BINOP_AND_SAAD("ladd ", p2[15], p2[16]) 402 BINOP_AND_SAA("lac ", p1[17], p1[18]) 403 BINOP_AND_SAAD("lacd ", p2[17], p2[18]) 404 } 405 406 pc2 = waitpid(child, &status, 0); 407 assert(pc2 == child); 408 409 /* assert that child finished normally */ 410 assert(WIFEXITED(status)); 411 412 printf("Store Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[0], *p2[0]); 413 printf("Load Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[1], *p2[1]); 414 printf("Load Atomic Swap: 32 bit %u, 64 bit %lld\n", *p1[2], *p2[2]); 415 printf("Load Atomic Increment: 32 bit %u, 64 bit %lld\n", *p1[3], *p2[3]); 416 printf("Load Atomic Decrement: 32 bit %u, 64 bit %lld\n", *p1[4], *p2[4]); 417 printf("Load Atomic Clear: 32 bit %u, 64 bit %lld\n", *p1[5], *p2[5]); 418 printf("Load Atomic Set: 32 bit %u, 64 bit %lld\n", *p1[6], *p2[6]); 419 printf("laa and saa: base1: %u, base2: %u\n", *p1[7], *p1[8]); 420 printf("laad and saad: base1: %lld, base2: %lld\n", *p2[7], *p2[8]); 421 printf("law and saa: base1: %u, base2: %u\n", *p1[9], *p1[10]); 422 printf("lawd and saad: base1: %lld, base2: %lld\n", *p2[9], *p2[10]); 423 printf("lai and saa: base1: %u, base2: %u\n", *p1[11], *p1[12]); 424 printf("laid and saad: base1: %lld, base2: %lld\n", *p2[11], *p2[12]); 425 printf("las and saa: base1: %u, base2: %u\n", *p1[13], *p1[14]); 426 printf("lasd and saad: base1: %lld, base2: %lld\n", *p2[13], *p2[14]); 427 printf("lad and saa: base1: %u, base2: %u\n", *p1[15], *p1[16]); 428 printf("ladd and saad: base1: %lld, base2: %lld\n", *p2[15], *p2[16]); 429 printf("lac and saa: base1: %u, base2: %u\n", *p1[17], *p1[18]); 430 printf("lacd and saad: base1: %lld, base2: %lld\n", *p2[17], *p2[18]); 431 432 for (i = 0; i < N; i++) { 433 if (p1_expd[i] == *p1[i] && p2_expd[i] == *p2[i]) { 434 printf("PASS %d\n", i+1); 435 } else { 436 printf("FAIL %d -- see source code for expected values\n", i+1); 437 } 438 } 439 440 printf("parent exits\n"); 441 #endif 442 return 0; 443 } 444