1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.cts.rsblas; 18 19 import android.renderscript.*; 20 import android.util.Log; 21 import java.util.Random; 22 import java.lang.Math; 23 24 public class BNNMTest extends RSBaseCompute { 25 26 static { 27 System.loadLibrary("bnnmdata_jni"); 28 } 29 30 native void getData(byte[] a, byte[] b, byte[] c); 31 32 // In Java, the eight-bit 'byte' type is signed, but the API for the 8-bit 33 // matrix multiplication deals with unsigned bytes. This is a convenience 34 // function that converts arrays of unsigned ints to their equivalent 35 // representations as signed bytes. For example, the bit pattern 0xff is 255 36 // as an unsigned value, but -127 as a Java signed byte. So if you pass in an 37 // array of int[] {255} into this function, you'll get back byte[] {-127}. 38 private byte[] unsignedToSignedByte(int[] input) { 39 byte[] output = new byte[input.length]; 40 for (int i = 0; i < input.length; ++i) { 41 output[i] = (byte)(input[i]); 42 } 43 return output; 44 } 45 46 private void addByteNoise(byte[] data, int count, float frequency, int maxDelta) { 47 Random rand = new Random(); 48 for (int n = 0; n < count; ++n) { 49 if (rand.nextFloat() < frequency) { 50 final int originalValue = data[n]; 51 final float direction = rand.nextFloat(); 52 int delta = (int)(Math.ceil(rand.nextFloat() * maxDelta)); 53 if (direction < 0.5f) { 54 delta = -delta; 55 } 56 int newValue = (originalValue + delta); 57 if (newValue < -127) { 58 newValue = -127; 59 } 60 if (newValue > 127) { 61 newValue = 127; 62 } 63 data[n] = (byte)(newValue); 64 } 65 } 66 } 67 68 private boolean testWithTolerance(byte[] c_byte, byte[] c_byte_output) { 69 70 // The testing procedure here is a bit complex, but the aim is to mimic the 71 // requirements we've empirically found running deep neural networks in real 72 // applications. We want to open the door to vendors using approximations that 73 // produce slightly different results for optimization's sake, but keep the 74 // precision loss within small enough bounds that we don't lose accuracy in 75 // the final result. 76 // After experimentation, we've found that we can tolerate around 5% of the 77 // output bytes being different by 1. Any larger differences are not tolerable 78 // and we can't get good results if the frequency of small differences is 79 // higher than 5%. This test tries to measure those properties on an example 80 // set of parameters that were captured from a real application. 81 // For example, if you uncommented this function that adds random noise to the 82 // results at a 3% specified frequency, the test should fail: 83 // AddByteNoise(c_byte_output, c_count, 0.03f, 1); 84 85 final boolean areSizesDifferent = (c_byte.length != c_byte_output.length); 86 final int c_count = Math.min(c_byte.length, c_byte_output.length); 87 88 int howManyDifferent = 0; 89 boolean areAnyTooDifferent = false; 90 for (int i = 0; i < c_count; i++) { 91 byte expectedValue = c_byte[i]; 92 byte actualValue = c_byte_output[i]; 93 int delta = (expectedValue - actualValue); 94 // First make sure that the difference is no more than one. 95 if ((delta < -1) || (delta > 1)) { 96 areAnyTooDifferent = true; 97 } 98 // If there is a difference, increment the counter to track it. 99 if (delta != 0) { 100 // Don't spam the logs if too many are different. 101 if (howManyDifferent < 50) { 102 android.util.Log.e("BNNM", "Mismatch at " + i + 103 ": expected " + (expectedValue & 0xff) + 104 ", got " + (actualValue & 0xff)); 105 } 106 ++howManyDifferent; 107 } 108 } 109 // We want no more than 2% of the values to show any differences, so work out 110 // what that means in absolute numbers. 111 final int percentThreshold = 2; 112 final int differenceThreshold = Math.max((percentThreshold * c_count) / 100, 1); 113 final boolean areTooManyDifferent = (howManyDifferent >= differenceThreshold); 114 115 if (areAnyTooDifferent) { 116 android.util.Log.e("BNNM", "Some outputs were too different."); 117 } 118 119 if (areTooManyDifferent) { 120 android.util.Log.e("BNNM", "There were too many small differences." + 121 " We can tolerate " + percentThreshold + "% (" + 122 differenceThreshold + "), but there were " + howManyDifferent); 123 } 124 125 return !(areAnyTooDifferent || areTooManyDifferent); 126 } 127 128 129 private byte[] runBNNM(int m, int n, int k, byte[] a_byte, int a_offset, byte[] b_byte, 130 int b_offset, int c_offset, int c_mult_int) { 131 Allocation A, B, C; 132 Type.Builder builder = new Type.Builder(mRS, Element.U8(mRS)); 133 Type a_type = builder.setX(k).setY(m).create(); 134 Type b_type = builder.setX(k).setY(n).create(); 135 Type c_type = builder.setX(n).setY(m).create(); 136 137 A = Allocation.createTyped(mRS, a_type); 138 B = Allocation.createTyped(mRS, b_type); 139 C = Allocation.createTyped(mRS, c_type); 140 141 A.copyFrom(a_byte); 142 B.copyFrom(b_byte); 143 // C doesn't matter, is output only 144 145 ScriptIntrinsicBLAS blas = ScriptIntrinsicBLAS.create(mRS); 146 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 147 148 int c_count = (m * n); 149 byte[] c_byte_output = new byte[c_count]; 150 C.copyTo(c_byte_output); 151 return c_byte_output; 152 } 153 154 155 156 // This test multiplies a couple of small 8-bit matrices, and compares the 157 // results with hand-calculated expectations. 158 public void testSmallMatrices() { 159 // The A matrix is: 160 // | 1 | 4 | 161 // | 2 | 5 | 162 // | 3 | 6 | 163 byte[] a_data = unsignedToSignedByte(new int[] { 164 1, 2, 3, 165 4, 5, 6, 166 }); 167 final int a_rows = 3; 168 final int a_cols = 2; 169 final int a_offset = 0; 170 // The B matrix is: 171 // | -1 | -2 | -3 | -4 | 172 // | -5 | -6 | -7 | -8 | 173 // | -9 | -10 | -11 | -12 | 174 byte[] b_data = unsignedToSignedByte(new int[] { 175 11, 7, 3, 176 10, 6, 2, 177 9, 5, 1, 178 8, 4, 0, 179 }); 180 final int b_cols = 4; 181 final int b_offset = 12; 182 // EightBitGemm implements C = B.transposed() * A, 183 // so we expect to get these results: 184 // 1*-1 + 2*-5 + 3*-9 + 128 = 90 185 // 1*-2 + 2*-6 + 3*-10 + 128 = 84 186 // 1*-3 + 2*-7 + 3*-11 + 128 = 78 187 // 1*-4 + 2*-8 + 3*-12 + 128 = 72 188 // 4*-1 + 5*-5 + 6*-9 + 128 = 45 189 // 4*-2 + 5*-6 + 6*-10 + 128 = 30 190 // 4*-3 + 5*-7 + 6*-11 + 128 = 15 191 // 4*-4 + 5*-8 + 6*-12 + 128 = 0 192 // | 90 | 45 | 193 // | 84 | 30 | 194 // | 78 | 15 | 195 // | 72 | 0 | 196 final int c_offset = 128; 197 final int c_shift = 21; 198 final int c_mult_int = (1 << c_shift); 199 byte[] expected_data = unsignedToSignedByte(new int[] { 200 90, 84, 78, 72, 201 45, 30, 15, 0, 202 }); 203 204 final int m = a_cols; 205 final int n = b_cols; 206 final int k = a_rows; 207 208 byte[] c_byte_output = runBNNM(m, n, k, a_data, a_offset, b_data, b_offset, 209 c_offset, c_mult_int); 210 assertTrue(testWithTolerance(expected_data, c_byte_output)); 211 } 212 213 214 // This test multiplies two medium-sized 8-bit matrices, and compares the 215 // results with the expected values. The data itself is fairly arbitrary. 216 public void testMediumMatrices1() { 217 byte[] a_data = unsignedToSignedByte(new int[] { 218 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 219 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 220 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 221 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 222 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 223 }); 224 final int a_rows = 11; 225 final int a_cols = 5; 226 final int a_offset = 0; 227 byte[] b_data = unsignedToSignedByte(new int[] { 228 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 11, 229 10, 12, 14, 16, 18, 20, 11, 13, 15, 17, 19, 21, 230 20, 22, 24, 26, 28, 30, 21, 23, 25, 27, 29, 31, 231 30, 32, 34, 36, 38, 40, 31, 33, 35, 37, 39, 41, 232 40, 42, 44, 46, 48, 50, 41, 43, 45, 47, 49, 51, 233 50, 52, 54, 56, 58, 60, 51, 53, 55, 57, 59, 61, 234 60, 62, 64, 66, 68, 70, 61, 63, 65, 67, 69, 71, 235 }); 236 final int b_cols = 7; 237 final int b_offset = 10; 238 final int c_offset = 16384; 239 final int c_shift = 21; 240 final int c_mult_int = (1 << (c_shift - 7)); 241 byte[] expected_data = unsignedToSignedByte(new int[] { 242 126, 131, 135, 140, 146, 151, 155, 243 121, 135, 148, 162, 176, 190, 202, 244 116, 139, 161, 184, 206, 229, 249, 245 128, 128, 129, 129, 129, 130, 130, 246 118, 136, 155, 173, 191, 210, 226, 247 }); 248 249 final int m = a_cols; 250 final int n = b_cols; 251 final int k = a_rows; 252 253 byte[] c_byte_output = runBNNM(m, n, k, a_data, a_offset, b_data, b_offset, 254 c_offset, c_mult_int); 255 assertTrue(testWithTolerance(expected_data, c_byte_output)); 256 } 257 258 // This test multiplies another two medium 8-bit matrices, and compares the 259 // results with the expected values. The data here is arbitrary. 260 public void testMediumMatrices2() { 261 byte[] a_data = unsignedToSignedByte(new int[] { 262 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 263 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 264 1, 23, 2, 22, 3, 21, 4, 20, 5, 19, 6, 18, 7, 17, 8, 16, 9, 15, 10, 14, 11, 13, 12, 265 23, 1, 22, 2, 21, 3, 20, 4, 19, 5, 18, 6, 17, 7, 16, 8, 15, 9, 14, 10, 13, 11, 12, 266 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 267 3, 1, 4, 1, 5, 8, 2, 3, 1, 14, 11, 15, 18, 12, 13, 11, 14, 11, 15, 18, 12, 13, 11, 268 8, 0, 5, 8, 1, 3, 7, 5, 7, 13, 10, 23, 13, 11, 17, 23, 12, 19, 17, 13, 14, 10, 19, 269 }); 270 final int a_rows = 23; 271 final int a_cols = 7; 272 final int a_offset = 13; 273 byte[] b_data = unsignedToSignedByte(new int[] { 274 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 11, 0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9, 275 0, 20, 40, 60, 80, 10, 11, 13, 15, 17, 19, 21, 10, 12, 14, 6, 8, 10, 1, 3, 5, 7, 9, 276 1, 21, 41, 61, 81, 11, 12, 14, 16, 18, 20, 22, 11, 13, 15, 7, 9, 11, 2, 4, 6, 8, 9, 277 0, 19, 39, 59, 79, 9, 10, 12, 14, 16, 18, 20, 9, 11, 13, 5, 7, 9, 0, 2, 4, 6, 8, 278 2, 22, 42, 62, 82, 12, 13, 15, 17, 19, 21, 23, 12, 14, 16, 8, 9, 12, 3, 5, 7, 9, 9, 279 0, 18, 38, 58, 78, 8, 9, 11, 13, 15, 17, 19, 8, 10, 12, 4, 6, 8, 0, 1, 3, 5, 7, 280 3, 23, 43, 63, 83, 13, 14, 16, 18, 20, 22, 24, 13, 15, 17, 9, 9, 13, 4, 6, 8, 9, 9, 281 0, 17, 37, 57, 77, 7, 8, 10, 12, 14, 16, 18, 7, 9, 11, 3, 5, 7, 0, 0, 2, 4, 6, 282 10, 20, 30, 40, 50, 1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 1, 2, 3, 283 }); 284 final int b_cols = 9; 285 final int b_offset = 23; 286 final int c_offset = 2121; 287 final int c_shift = 21; 288 final int c_mult_int = 132359; 289 byte[] expected_data = unsignedToSignedByte(new int[] { 290 167, 53, 51, 54, 49, 55, 46, 291 56, 116, 153, 232, 232, 234, 231, 292 236, 232, 237, 174, 168, 131, 130, 293 132, 129, 133, 128, 133, 134, 151, 294 154, 152, 156, 151, 158, 150, 160, 295 156, 255, 113, 106, 120, 98, 127, 296 91, 134, 178, 231, 102, 97, 107, 297 92, 111, 87, 116, 164, 187, 76, 298 73, 78, 70, 81, 67, 83, 139, 299 }); 300 301 final int m = a_cols; 302 final int n = b_cols; 303 final int k = a_rows; 304 305 byte[] c_byte_output = runBNNM(m, n, k, a_data, a_offset, b_data, b_offset, 306 c_offset, c_mult_int); 307 assertTrue(testWithTolerance(expected_data, c_byte_output)); 308 } 309 310 311 // This test takes a large set of real data captured from a convolutional 312 // neural network solving a computer vision problem, and runs it through the 313 // eight-bit matrix multiply. We test the results to make sure they're close 314 // enough to be usable. 315 public void testRealData() { 316 317 int m = 256; 318 int n = 192; 319 int k = 1152; 320 int a_offset = 0; 321 int b_offset = 84; 322 int c_mult_int = 3401; 323 int c_offset = 74980; 324 325 int a_count = (m * k); 326 int b_count = (n * k); 327 int c_count = (m * n); 328 329 byte[] a_byte = new byte[a_count]; 330 byte[] b_byte = new byte[b_count]; 331 byte[] c_byte = new byte[c_count]; 332 333 getData(a_byte, b_byte, c_byte); 334 335 byte[] c_byte_output = runBNNM(m, n, k, a_byte, a_offset, b_byte, b_offset, 336 c_offset, c_mult_int); 337 338 assertTrue(testWithTolerance(c_byte, c_byte_output)); 339 340 } 341 342 // This test multiplies matrices where the results are expected to fall 343 // slightly outside the 0 to 255 valid output range. This test ensures the 344 // values get clamped to that range, rather than wrapping around. 345 public void testClamping() { 346 // The A matrix is: 347 // | 1 | 4 | 348 // | 2 | 5 | 349 // | 3 | 6 | 350 byte[] a_data = unsignedToSignedByte(new int[] { 351 1, 2, 3, 352 4, 5, 6, 353 }); 354 final int a_rows = 3; 355 final int a_cols = 2; 356 final int a_offset = 0; 357 // The B matrix is: 358 // | -1 | -2 | -3 | -4 | 359 // | -5 | -6 | -7 | -8 | 360 // | 99 | -40 | -11 | -15 | 361 byte[] b_data = unsignedToSignedByte(new int[] { 362 126, 122, 226, 363 125, 121, 87, 364 124, 120, 116, 365 123, 119, 112, 366 }); 367 final int b_cols = 4; 368 final int b_offset = 127; 369 // EightBitGemm implements C = B.transposed() * A, 370 // so we expect to get these results: 371 // 1*-1 + 2*-5 + 3* 99 + 128 = 414 (clamped to 255) 372 // 1*-2 + 2*-6 + 3*-40 + 128 = -6 (clamped to 0) 373 // 1*-3 + 2*-7 + 3*-11 + 128 = 78 374 // 1*-4 + 2*-8 + 3*-15 + 128 = 63 375 // 4*-1 + 5*-5 + 6* 99 + 128 = 693 (clamped to 255) 376 // 4*-2 + 5*-6 + 6*-40 + 128 = -150 (clamped to 0) 377 // 4*-3 + 5*-7 + 6*-11 + 128 = 15 378 // 4*-4 + 5*-8 + 6*-15 + 128 = -18 (clamped to 0) 379 // | 255 | 255 | 380 // | 0 | 0 | 381 // | 78 | 15 | 382 // | 63 | 0 | 383 final int c_offset = 128; 384 final int c_shift = 21; 385 final int c_mult_int = (1 << c_shift); 386 byte[] expected_data = unsignedToSignedByte(new int[] { 387 255, 0, 78, 63, 388 255, 0, 15, 0, 389 }); 390 391 final int m = a_cols; 392 final int n = b_cols; 393 final int k = a_rows; 394 395 byte[] c_byte_output = runBNNM(m, n, k, a_data, a_offset, b_data, b_offset, 396 c_offset, c_mult_int); 397 assertTrue(testWithTolerance(expected_data, c_byte_output)); 398 } 399 400 // This tests the exception handling for a_offset and b_offset. 401 public void testExceptionHandling() { 402 // The A matrix is: 403 // | 1 | 4 | 404 // | 2 | 5 | 405 // | 3 | 6 | 406 byte[] a_data = unsignedToSignedByte(new int[] { 407 1, 2, 3, 408 4, 5, 6, 409 }); 410 final int a_rows = 3; 411 final int a_cols = 2; 412 // The B matrix is: 413 // | -1 | -2 | -3 | -4 | 414 // | -5 | -6 | -7 | -8 | 415 // | -9 | -10 | -11 | -12 | 416 byte[] b_data = unsignedToSignedByte(new int[] { 417 11, 7, 3, 418 10, 6, 2, 419 9, 5, 1, 420 8, 4, 0, 421 }); 422 final int b_cols = 4; 423 // EightBitGemm implements C = B.transposed() * A, 424 // so we expect to get these results: 425 // 1*-1 + 2*-5 + 3*-9 + 128 = 90 426 // 1*-2 + 2*-6 + 3*-10 + 128 = 84 427 // 1*-3 + 2*-7 + 3*-11 + 128 = 78 428 // 1*-4 + 2*-8 + 3*-12 + 128 = 72 429 // 4*-1 + 5*-5 + 6*-9 + 128 = 45 430 // 4*-2 + 5*-6 + 6*-10 + 128 = 30 431 // 4*-3 + 5*-7 + 6*-11 + 128 = 15 432 // 4*-4 + 5*-8 + 6*-12 + 128 = 0 433 // | 90 | 45 | 434 // | 84 | 30 | 435 // | 78 | 15 | 436 // | 72 | 0 | 437 final int c_offset = 128; 438 final int c_shift = 21; 439 final int c_mult_int = (1 << c_shift); 440 byte[] expected_data = unsignedToSignedByte(new int[] { 441 90, 84, 78, 72, 442 45, 30, 15, 0, 443 }); 444 445 final int m = a_cols; 446 final int n = b_cols; 447 final int k = a_rows; 448 449 Allocation A, B, C; 450 Type.Builder builder = new Type.Builder(mRS, Element.U8(mRS)); 451 Type a_type = builder.setX(k).setY(m).create(); 452 Type b_type = builder.setX(k).setY(n).create(); 453 Type c_type = builder.setX(n).setY(m).create(); 454 455 A = Allocation.createTyped(mRS, a_type); 456 B = Allocation.createTyped(mRS, b_type); 457 C = Allocation.createTyped(mRS, c_type); 458 459 A.copyFrom(a_data); 460 B.copyFrom(b_data); 461 // C doesn't matter, is output only 462 463 ScriptIntrinsicBLAS blas = ScriptIntrinsicBLAS.create(mRS); 464 try { 465 int a_offset = 0; 466 int b_offset = 12; 467 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 468 } catch (RSRuntimeException e) { 469 fail("should NOT throw RSRuntimeException for valid offsets"); 470 } 471 try { 472 int a_offset = -23; 473 int b_offset = 12; 474 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 475 fail("should throw RSRuntimeException for invalid offsets: a_offset < 0"); 476 } catch (RSRuntimeException e) { 477 } 478 try { 479 int a_offset = 888; 480 int b_offset = 12; 481 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 482 fail("should throw RSRuntimeException for invalid offsets: a_offset > 255"); 483 } catch (RSRuntimeException e) { 484 } 485 try { 486 int a_offset = 0; 487 int b_offset = -1; 488 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 489 fail("should throw RSRuntimeException for invalid offsets: b_offset < 0"); 490 } catch (RSRuntimeException e) { 491 } 492 try { 493 int a_offset = 0; 494 int b_offset = 256; 495 blas.BNNM(A, a_offset, B, b_offset, C, c_offset, c_mult_int); 496 fail("should throw RSRuntimeException for invalid offsets: b_offset > 255"); 497 } catch (RSRuntimeException e) { 498 } 499 } 500 } 501