1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <assert.h> 6 #include <math.h> 7 #include <stdint.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <sys/time.h> 12 #include <unistd.h> 13 14 #include <ppapi/c/ppb_input_event.h> 15 #include <ppapi/cpp/fullscreen.h> 16 #include <ppapi/cpp/input_event.h> 17 #include <ppapi/cpp/var.h> 18 #include <ppapi/cpp/var_array.h> 19 #include <ppapi/cpp/var_array_buffer.h> 20 #include <ppapi/cpp/var_dictionary.h> 21 22 #include "ppapi_simple/ps.h" 23 #include "ppapi_simple/ps_context_2d.h" 24 #include "ppapi_simple/ps_event.h" 25 #include "ppapi_simple/ps_instance.h" 26 #include "ppapi_simple/ps_interface.h" 27 #include "ppapi_simple/ps_main.h" 28 #include "sdk_util/macros.h" 29 #include "sdk_util/thread_pool.h" 30 31 using namespace sdk_util; // For sdk_util::ThreadPool 32 33 namespace { 34 35 #define INLINE inline __attribute__((always_inline)) 36 37 // BGRA helper macro, for constructing a pixel for a BGRA buffer. 38 #define MakeBGRA(b, g, r, a) \ 39 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) 40 41 const int kFramesToBenchmark = 100; 42 const int kCellAlignment = 0x10; 43 44 // 128 bit vector types 45 typedef uint8_t u8x16_t __attribute__ ((vector_size (16))); 46 47 // Helper function to broadcast x across 16 element vector. 48 INLINE u8x16_t broadcast(uint8_t x) { 49 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x}; 50 return r; 51 } 52 53 // Convert a count value into a live (green) or dead color value. 54 const uint32_t kNeighborColors[] = { 55 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 56 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 57 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 58 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 59 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 60 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), 61 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), 62 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), 63 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 64 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 65 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 66 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 67 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 68 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 69 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 70 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 71 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 72 MakeBGRA(0x00, 0x00, 0x00, 0xFF), 73 }; 74 75 // These represent the new health value of a cell based on its neighboring 76 // values. The health is binary: either alive or dead. 77 const uint8_t kIsAlive[] = { 78 0, 0, 0, 0, 0, 1, 1, 1, 0, 79 0, 0, 0, 0, 0, 0, 0, 0, 0 80 }; 81 82 // Timer helper for benchmarking. Returns seconds elapsed since program start, 83 // as a double. 84 timeval start_tv; 85 int start_tv_retv = gettimeofday(&start_tv, NULL); 86 87 inline double getseconds() { 88 const double usec_to_sec = 0.000001; 89 timeval tv; 90 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL))) 91 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec; 92 return 0.0; 93 } 94 } // namespace 95 96 97 class Life { 98 public: 99 Life(); 100 virtual ~Life(); 101 // Runs a tick of the simulations, update 2D output. 102 void Update(); 103 // Handle event from user, or message from JS. 104 void HandleEvent(PSEvent* ps_event); 105 private: 106 void UpdateContext(); 107 void DrawCell(int32_t x, int32_t y); 108 void ProcessTouchEvent(const pp::TouchInputEvent& touches); 109 void PostUpdateMessage(const char* message, double value); 110 void StartBenchmark(); 111 void EndBenchmark(); 112 void Stir(); 113 void wSimulate(int y); 114 static void wSimulateEntry(int y, void* data); 115 void Simulate(); 116 117 bool simd_; 118 bool multithread_; 119 bool benchmarking_; 120 int benchmark_frame_counter_; 121 double bench_start_time_; 122 double bench_end_time_; 123 uint8_t* cell_in_; 124 uint8_t* cell_out_; 125 int32_t cell_stride_; 126 int32_t width_; 127 int32_t height_; 128 PSContext2D_t* ps_context_; 129 ThreadPool* workers_; 130 }; 131 132 Life::Life() : 133 simd_(true), 134 multithread_(true), 135 benchmarking_(false), 136 benchmark_frame_counter_(0), 137 bench_start_time_(0.0), 138 bench_end_time_(0.0), 139 cell_in_(NULL), 140 cell_out_(NULL), 141 cell_stride_(0), 142 width_(0), 143 height_(0) { 144 ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL); 145 // Query system for number of processors via sysconf() 146 int num_threads = sysconf(_SC_NPROCESSORS_ONLN); 147 if (num_threads < 2) 148 num_threads = 2; 149 workers_ = new ThreadPool(num_threads); 150 PSEventSetFilter(PSE_ALL); 151 } 152 153 Life::~Life() { 154 delete workers_; 155 PSContext2DFree(ps_context_); 156 } 157 158 void Life::UpdateContext() { 159 cell_stride_ = (ps_context_->width + kCellAlignment - 1) & 160 ~(kCellAlignment - 1); 161 size_t size = cell_stride_ * ps_context_->height; 162 163 if (ps_context_->width != width_ || ps_context_->height != height_) { 164 free(cell_in_); 165 free(cell_out_); 166 167 // Create a new context 168 void* in_buffer = NULL; 169 void* out_buffer = NULL; 170 // alloc buffers aligned on 16 bytes 171 posix_memalign(&in_buffer, kCellAlignment, size); 172 posix_memalign(&out_buffer, kCellAlignment, size); 173 cell_in_ = (uint8_t*) in_buffer; 174 cell_out_ = (uint8_t*) out_buffer; 175 176 memset(cell_out_, 0, size); 177 for (size_t index = 0; index < size; index++) { 178 cell_in_[index] = rand() & 1; 179 } 180 width_ = ps_context_->width; 181 height_ = ps_context_->height; 182 } 183 } 184 185 void Life::DrawCell(int32_t x, int32_t y) { 186 if (!cell_in_) return; 187 if (x > 0 && x < ps_context_->width - 1 && 188 y > 0 && y < ps_context_->height - 1) { 189 cell_in_[x - 1 + y * cell_stride_] = 1; 190 cell_in_[x + 1 + y * cell_stride_] = 1; 191 cell_in_[x + (y - 1) * cell_stride_] = 1; 192 cell_in_[x + (y + 1) * cell_stride_] = 1; 193 } 194 } 195 196 void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) { 197 uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES); 198 uint32_t i, j; 199 for (i = 0; i < count; i++) { 200 pp::TouchPoint touch = 201 touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i); 202 int radius = (int)(touch.radii().x()); 203 int x = (int)(touch.position().x()); 204 int y = (int)(touch.position().y()); 205 // num = 1/100th the area of touch point 206 uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f); 207 for (j = 0; j < num; j++) { 208 int dx = rand() % (radius * 2) - radius; 209 int dy = rand() % (radius * 2) - radius; 210 // only plot random cells within the touch area 211 if (dx * dx + dy * dy <= radius * radius) 212 DrawCell(x + dx, y + dy); 213 } 214 } 215 } 216 217 void Life::PostUpdateMessage(const char* message_name, double value) { 218 pp::VarDictionary message; 219 message.Set("message", message_name); 220 message.Set("value", value); 221 PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var()); 222 } 223 224 void Life::StartBenchmark() { 225 printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n", 226 simd_ ? "enabled" : "disabled", 227 multithread_ ? "enabled" : "disabled", 228 ps_context_->width, 229 ps_context_->height); 230 benchmarking_ = true; 231 bench_start_time_ = getseconds(); 232 benchmark_frame_counter_ = kFramesToBenchmark; 233 } 234 235 void Life::EndBenchmark() { 236 double total_time; 237 bench_end_time_ = getseconds(); 238 benchmarking_ = false; 239 total_time = bench_end_time_ - bench_start_time_; 240 printf("Finished - benchmark took %f seconds\n", total_time); 241 // Send benchmark result to JS. 242 PostUpdateMessage("benchmark_result", total_time); 243 } 244 245 void Life::HandleEvent(PSEvent* ps_event) { 246 // Give the 2D context a chance to process the event. 247 if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) { 248 UpdateContext(); 249 return; 250 } 251 252 switch(ps_event->type) { 253 254 case PSE_INSTANCE_HANDLEINPUT: { 255 pp::InputEvent event(ps_event->as_resource); 256 257 switch(event.GetType()) { 258 case PP_INPUTEVENT_TYPE_MOUSEDOWN: 259 case PP_INPUTEVENT_TYPE_MOUSEMOVE: { 260 pp::MouseInputEvent mouse = pp::MouseInputEvent(event); 261 // If the button is down, draw 262 if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) { 263 PP_Point location = mouse.GetPosition(); 264 DrawCell(location.x, location.y); 265 } 266 break; 267 } 268 269 case PP_INPUTEVENT_TYPE_TOUCHSTART: 270 case PP_INPUTEVENT_TYPE_TOUCHMOVE: { 271 pp::TouchInputEvent touches = pp::TouchInputEvent(event); 272 ProcessTouchEvent(touches); 273 break; 274 } 275 276 case PP_INPUTEVENT_TYPE_KEYDOWN: { 277 pp::Fullscreen fullscreen(PSInstance::GetInstance()); 278 bool isFullscreen = fullscreen.IsFullscreen(); 279 fullscreen.SetFullscreen(!isFullscreen); 280 break; 281 } 282 283 default: 284 break; 285 } 286 break; // case PSE_INSTANCE_HANDLEINPUT 287 } 288 289 case PSE_INSTANCE_HANDLEMESSAGE: { 290 // Convert Pepper Simple message to PPAPI C++ vars 291 pp::Var var(ps_event->as_var); 292 if (var.is_dictionary()) { 293 pp::VarDictionary dictionary(var); 294 std::string message = dictionary.Get("message").AsString(); 295 if (message == "run_benchmark" && !benchmarking_) { 296 StartBenchmark(); 297 } else if (message == "set_simd") { 298 simd_ = dictionary.Get("value").AsBool(); 299 } else if (message == "set_threading") { 300 multithread_ = dictionary.Get("value").AsBool(); 301 } 302 } 303 break; // case PSE_INSTANCE_HANDLEMESSAGE 304 } 305 306 default: 307 break; 308 } 309 } 310 311 void Life::Stir() { 312 int32_t width = ps_context_->width; 313 int32_t height = ps_context_->height; 314 int32_t stride = cell_stride_; 315 int32_t i; 316 if (cell_in_ == NULL || cell_out_ == NULL) 317 return; 318 319 for (i = 0; i < width; ++i) { 320 cell_in_[i] = rand() & 1; 321 cell_in_[i + (height - 1) * stride] = rand() & 1; 322 } 323 for (i = 0; i < height; ++i) { 324 cell_in_[i * stride] = rand() & 1; 325 cell_in_[i * stride + (width - 1)] = rand() & 1; 326 } 327 } 328 329 void Life::wSimulate(int y) { 330 // Don't run simulation on top and bottom borders 331 if (y < 1 || y >= ps_context_->height - 1) 332 return; 333 334 // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell 335 // wide perimeter is excluded from the simulation update; only cells from 336 // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated. 337 uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_); 338 uint8_t *src1 = src0 + cell_stride_; 339 uint8_t *src2 = src1 + cell_stride_; 340 uint8_t *dst = (cell_out_ + y * cell_stride_) + 1; 341 uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data); 342 uint32_t *pixel_line = // static_cast<uint32_t*> 343 (pixels + y * ps_context_->stride / sizeof(uint32_t)); 344 int32_t x = 1; 345 346 if (simd_) { 347 const u8x16_t kOne = broadcast(1); 348 const u8x16_t kFour = broadcast(4); 349 const u8x16_t kEight = broadcast(8); 350 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 351 352 // Prime the src 353 u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]); 354 u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]); 355 u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]); 356 u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]); 357 u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]); 358 u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]); 359 360 // This inner loop is SIMD - each loop iteration will process 16 cells. 361 for (; (x + 15) < (ps_context_->width - 1); x += 16) { 362 363 // Construct jittered source temps, using __builtin_shufflevector(..) to 364 // extract a shifted 16 element vector from the 32 element concatenation 365 // of two source vectors. 366 u8x16_t src0j0 = src00; 367 u8x16_t src0j1 = __builtin_shufflevector(src00, src01, 368 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); 369 u8x16_t src0j2 = __builtin_shufflevector(src00, src01, 370 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); 371 u8x16_t src1j0 = src10; 372 u8x16_t src1j1 = __builtin_shufflevector(src10, src11, 373 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); 374 u8x16_t src1j2 = __builtin_shufflevector(src10, src11, 375 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); 376 u8x16_t src2j0 = src20; 377 u8x16_t src2j1 = __builtin_shufflevector(src20, src21, 378 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); 379 u8x16_t src2j2 = __builtin_shufflevector(src20, src21, 380 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); 381 382 // Sum the jittered sources to construct neighbor count. 383 u8x16_t count = src0j0 + src0j1 + src0j2 + 384 src1j0 + + src1j2 + 385 src2j0 + src2j1 + src2j2; 386 // Add the center cell. 387 count = count + count + src1j1; 388 // If count > 4 and < 8, center cell will be alive in the next frame. 389 u8x16_t alive1 = count > kFour; 390 u8x16_t alive2 = count < kEight; 391 // Intersect the two comparisons from above. 392 u8x16_t alive = alive1 & alive2; 393 394 // At this point, alive[x] will be one of two values: 395 // 0x00 for a dead cell 396 // 0xFF for an alive cell. 397 // 398 // Next, convert alive cells to green pixel color. 399 // Use __builtin_shufflevector(..) to construct output pixels from 400 // concantination of alive vector and kZero255 const vector. 401 // Indices 0..15 select the 16 cells from alive vector. 402 // Index 16 is zero constant from kZero255 constant vector. 403 // Index 17 is 255 constant from kZero255 constant vector. 404 // Output pixel color values are in BGRABGRABGRABGRA order. 405 // Since each pixel needs 4 bytes of color information, 16 cells will 406 // need to expand to 4 seperate 16 byte pixel splats. 407 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255, 408 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17); 409 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255, 410 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17); 411 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255, 412 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17); 413 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255, 414 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17); 415 416 // Write 16 pixels to output pixel buffer. 417 *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3; 418 *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7; 419 *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11; 420 *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15; 421 422 // Convert alive mask to 1 or 0 and store in destination cell array. 423 *reinterpret_cast<u8x16_t*>(dst) = alive & kOne; 424 425 // Increment pointers. 426 pixel_line += 16; 427 dst += 16; 428 src0 += 16; 429 src1 += 16; 430 src2 += 16; 431 432 // Shift source over by 16 cells and read the next 16 cells. 433 src00 = src01; 434 src01 = *reinterpret_cast<u8x16_t*>(&src0[16]); 435 src10 = src11; 436 src11 = *reinterpret_cast<u8x16_t*>(&src1[16]); 437 src20 = src21; 438 src21 = *reinterpret_cast<u8x16_t*>(&src2[16]); 439 } 440 } 441 442 // The SIMD loop above does 16 cells at a time. The loop below is the 443 // regular version which processes one cell at a time. It is used to 444 // finish the remainder of the scanline not handled by the SIMD loop. 445 for (; x < (ps_context_->width - 1); ++x) { 446 // Sum the jittered sources to construct neighbor count. 447 int count = src0[0] + src0[1] + src0[2] + 448 src1[0] + + src1[2] + 449 src2[0] + src2[1] + src2[2]; 450 // Add the center cell. 451 count = count + count + src1[1]; 452 // Use table lookup indexed by count to determine pixel & alive state. 453 uint32_t color = kNeighborColors[count]; 454 *pixel_line++ = color; 455 *dst++ = kIsAlive[count]; 456 ++src0; 457 ++src1; 458 ++src2; 459 } 460 } 461 462 // Static entry point for worker thread. 463 void Life::wSimulateEntry(int slice, void* thiz) { 464 static_cast<Life*>(thiz)->wSimulate(slice); 465 } 466 467 void Life::Simulate() { 468 // Stir up the edges to prevent the simulation from reaching steady state. 469 Stir(); 470 471 if (multithread_) { 472 // If multi-threading enabled, dispatch tasks to pool of worker threads. 473 workers_->Dispatch(ps_context_->height, wSimulateEntry, this); 474 } else { 475 // Else manually simulate each line on this thread. 476 for (int y = 0; y < ps_context_->height; y++) { 477 wSimulateEntry(y, this); 478 } 479 } 480 std::swap(cell_in_, cell_out_); 481 } 482 483 void Life::Update() { 484 485 PSContext2DGetBuffer(ps_context_); 486 if (NULL == ps_context_->data) 487 return; 488 489 // If we somehow have not allocated these pointers yet, skip this frame. 490 if (!cell_in_ || !cell_out_) return; 491 492 // Simulate one (or more if benchmarking) frames 493 do { 494 Simulate(); 495 if (!benchmarking_) 496 break; 497 --benchmark_frame_counter_; 498 } while(benchmark_frame_counter_ > 0); 499 if (benchmarking_) 500 EndBenchmark(); 501 502 PSContext2DSwapBuffer(ps_context_); 503 } 504 505 // Starting point for the module. We do not use main since it would 506 // collide with main in libppapi_cpp. 507 int example_main(int argc, char* argv[]) { 508 Life life; 509 while (true) { 510 PSEvent* ps_event; 511 // Consume all available events 512 while ((ps_event = PSEventTryAcquire()) != NULL) { 513 life.HandleEvent(ps_event); 514 PSEventRelease(ps_event); 515 } 516 // Do simulation, render and present. 517 life.Update(); 518 } 519 return 0; 520 } 521 522 // Register the function to call once the Instance Object is initialized. 523 // see: pappi_simple/ps_main.h 524 PPAPI_SIMPLE_REGISTER_MAIN(example_main); 525