1 /* 2 * Diff Match and Patch 3 * 4 * Copyright 2006 Google Inc. 5 * http://code.google.com/p/google-diff-match-patch/ 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 package name.fraser.neil.plaintext; 21 22 import java.io.UnsupportedEncodingException; 23 import java.net.URLEncoder; 24 import java.net.URLDecoder; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.LinkedList; 30 import java.util.List; 31 import java.util.ListIterator; 32 import java.util.Map; 33 import java.util.Set; 34 import java.util.Stack; 35 import java.util.regex.Matcher; 36 import java.util.regex.Pattern; 37 38 39 /* 40 * Functions for diff, match and patch. 41 * Computes the difference between two texts to create a patch. 42 * Applies the patch onto another text, allowing for errors. 43 * 44 * @author fraser (at) google.com (Neil Fraser) 45 */ 46 47 /** 48 * Class containing the diff, match and patch methods. 49 * Also contains the behaviour settings. 50 */ 51 public class diff_match_patch { 52 53 // Defaults. 54 // Set these on your diff_match_patch instance to override the defaults. 55 56 /** 57 * Number of seconds to map a diff before giving up (0 for infinity). 58 */ 59 public float Diff_Timeout = 1.0f; 60 /** 61 * Cost of an empty edit operation in terms of edit characters. 62 */ 63 public short Diff_EditCost = 4; 64 /** 65 * The size beyond which the double-ended diff activates. 66 * Double-ending is twice as fast, but less accurate. 67 */ 68 public short Diff_DualThreshold = 32; 69 /** 70 * At what point is no match declared (0.0 = perfection, 1.0 = very loose). 71 */ 72 public float Match_Threshold = 0.5f; 73 /** 74 * How far to search for a match (0 = exact location, 1000+ = broad match). 75 * A match this many characters away from the expected location will add 76 * 1.0 to the score (0.0 is a perfect match). 77 */ 78 public int Match_Distance = 1000; 79 /** 80 * When deleting a large block of text (over ~64 characters), how close does 81 * the contents have to match the expected contents. (0.0 = perfection, 82 * 1.0 = very loose). Note that Match_Threshold controls how closely the 83 * end points of a delete need to match. 84 */ 85 public float Patch_DeleteThreshold = 0.5f; 86 /** 87 * Chunk size for context length. 88 */ 89 public short Patch_Margin = 4; 90 91 /** 92 * The number of bits in an int. 93 */ 94 private int Match_MaxBits = 32; 95 96 /** 97 * Internal class for returning results from diff_linesToChars(). 98 * Other less paranoid languages just use a three-element array. 99 */ 100 protected static class LinesToCharsResult { 101 protected String chars1; 102 protected String chars2; 103 protected List<String> lineArray; 104 105 protected LinesToCharsResult(String chars1, String chars2, 106 List<String> lineArray) { 107 this.chars1 = chars1; 108 this.chars2 = chars2; 109 this.lineArray = lineArray; 110 } 111 } 112 113 114 // DIFF FUNCTIONS 115 116 117 /** 118 * The data structure representing a diff is a Linked list of Diff objects: 119 * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), 120 * Diff(Operation.EQUAL, " world.")} 121 * which means: delete "Hello", add "Goodbye" and keep " world." 122 */ 123 public enum Operation { 124 DELETE, INSERT, EQUAL 125 } 126 127 128 /** 129 * Find the differences between two texts. 130 * Run a faster slightly less optimal diff 131 * This method allows the 'checklines' of diff_main() to be optional. 132 * Most of the time checklines is wanted, so default to true. 133 * @param text1 Old string to be diffed. 134 * @param text2 New string to be diffed. 135 * @return Linked List of Diff objects. 136 */ 137 public LinkedList<Diff> diff_main(String text1, String text2) { 138 return diff_main(text1, text2, true); 139 } 140 141 /** 142 * Find the differences between two texts. Simplifies the problem by 143 * stripping any common prefix or suffix off the texts before diffing. 144 * @param text1 Old string to be diffed. 145 * @param text2 New string to be diffed. 146 * @param checklines Speedup flag. If false, then don't run a 147 * line-level diff first to identify the changed areas. 148 * If true, then run a faster slightly less optimal diff 149 * @return Linked List of Diff objects. 150 */ 151 public LinkedList<Diff> diff_main(String text1, String text2, 152 boolean checklines) { 153 // Check for null inputs. 154 if (text1 == null || text2 == null) { 155 throw new IllegalArgumentException("Null inputs. (diff_main)"); 156 } 157 158 // Check for equality (speedup). 159 LinkedList<Diff> diffs; 160 if (text1.equals(text2)) { 161 diffs = new LinkedList<Diff>(); 162 diffs.add(new Diff(Operation.EQUAL, text1)); 163 return diffs; 164 } 165 166 // Trim off common prefix (speedup). 167 int commonlength = diff_commonPrefix(text1, text2); 168 String commonprefix = text1.substring(0, commonlength); 169 text1 = text1.substring(commonlength); 170 text2 = text2.substring(commonlength); 171 172 // Trim off common suffix (speedup). 173 commonlength = diff_commonSuffix(text1, text2); 174 String commonsuffix = text1.substring(text1.length() - commonlength); 175 text1 = text1.substring(0, text1.length() - commonlength); 176 text2 = text2.substring(0, text2.length() - commonlength); 177 178 // Compute the diff on the middle block. 179 diffs = diff_compute(text1, text2, checklines); 180 181 // Restore the prefix and suffix. 182 if (commonprefix.length() != 0) { 183 diffs.addFirst(new Diff(Operation.EQUAL, commonprefix)); 184 } 185 if (commonsuffix.length() != 0) { 186 diffs.addLast(new Diff(Operation.EQUAL, commonsuffix)); 187 } 188 189 diff_cleanupMerge(diffs); 190 return diffs; 191 } 192 193 194 /** 195 * Find the differences between two texts. Assumes that the texts do not 196 * have any common prefix or suffix. 197 * @param text1 Old string to be diffed. 198 * @param text2 New string to be diffed. 199 * @param checklines Speedup flag. If false, then don't run a 200 * line-level diff first to identify the changed areas. 201 * If true, then run a faster slightly less optimal diff 202 * @return Linked List of Diff objects. 203 */ 204 protected LinkedList<Diff> diff_compute(String text1, String text2, 205 boolean checklines) { 206 LinkedList<Diff> diffs = new LinkedList<Diff>(); 207 208 if (text1.length() == 0) { 209 // Just add some text (speedup). 210 diffs.add(new Diff(Operation.INSERT, text2)); 211 return diffs; 212 } 213 214 if (text2.length() == 0) { 215 // Just delete some text (speedup). 216 diffs.add(new Diff(Operation.DELETE, text1)); 217 return diffs; 218 } 219 220 String longtext = text1.length() > text2.length() ? text1 : text2; 221 String shorttext = text1.length() > text2.length() ? text2 : text1; 222 int i = longtext.indexOf(shorttext); 223 if (i != -1) { 224 // Shorter text is inside the longer text (speedup). 225 Operation op = (text1.length() > text2.length()) ? 226 Operation.DELETE : Operation.INSERT; 227 diffs.add(new Diff(op, longtext.substring(0, i))); 228 diffs.add(new Diff(Operation.EQUAL, shorttext)); 229 diffs.add(new Diff(op, longtext.substring(i + shorttext.length()))); 230 return diffs; 231 } 232 longtext = shorttext = null; // Garbage collect. 233 234 // Check to see if the problem can be split in two. 235 String[] hm = diff_halfMatch(text1, text2); 236 if (hm != null) { 237 // A half-match was found, sort out the return data. 238 String text1_a = hm[0]; 239 String text1_b = hm[1]; 240 String text2_a = hm[2]; 241 String text2_b = hm[3]; 242 String mid_common = hm[4]; 243 // Send both pairs off for separate processing. 244 LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a, checklines); 245 LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b, checklines); 246 // Merge the results. 247 diffs = diffs_a; 248 diffs.add(new Diff(Operation.EQUAL, mid_common)); 249 diffs.addAll(diffs_b); 250 return diffs; 251 } 252 253 // Perform a real diff. 254 if (checklines && (text1.length() < 100 || text2.length() < 100)) { 255 checklines = false; // Too trivial for the overhead. 256 } 257 List<String> linearray = null; 258 if (checklines) { 259 // Scan the text on a line-by-line basis first. 260 LinesToCharsResult b = diff_linesToChars(text1, text2); 261 text1 = b.chars1; 262 text2 = b.chars2; 263 linearray = b.lineArray; 264 } 265 266 diffs = diff_map(text1, text2); 267 if (diffs == null) { 268 // No acceptable result. 269 diffs = new LinkedList<Diff>(); 270 diffs.add(new Diff(Operation.DELETE, text1)); 271 diffs.add(new Diff(Operation.INSERT, text2)); 272 } 273 274 if (checklines) { 275 // Convert the diff back to original text. 276 diff_charsToLines(diffs, linearray); 277 // Eliminate freak matches (e.g. blank lines) 278 diff_cleanupSemantic(diffs); 279 280 // Rediff any replacement blocks, this time character-by-character. 281 // Add a dummy entry at the end. 282 diffs.add(new Diff(Operation.EQUAL, "")); 283 int count_delete = 0; 284 int count_insert = 0; 285 String text_delete = ""; 286 String text_insert = ""; 287 ListIterator<Diff> pointer = diffs.listIterator(); 288 Diff thisDiff = pointer.next(); 289 while (thisDiff != null) { 290 switch (thisDiff.operation) { 291 case INSERT: 292 count_insert++; 293 text_insert += thisDiff.text; 294 break; 295 case DELETE: 296 count_delete++; 297 text_delete += thisDiff.text; 298 break; 299 case EQUAL: 300 // Upon reaching an equality, check for prior redundancies. 301 if (count_delete >= 1 && count_insert >= 1) { 302 // Delete the offending records and add the merged ones. 303 pointer.previous(); 304 for (int j = 0; j < count_delete + count_insert; j++) { 305 pointer.previous(); 306 pointer.remove(); 307 } 308 for (Diff newDiff : diff_main(text_delete, text_insert, false)) { 309 pointer.add(newDiff); 310 } 311 } 312 count_insert = 0; 313 count_delete = 0; 314 text_delete = ""; 315 text_insert = ""; 316 break; 317 } 318 thisDiff = pointer.hasNext() ? pointer.next() : null; 319 } 320 diffs.removeLast(); // Remove the dummy entry at the end. 321 } 322 return diffs; 323 } 324 325 326 /** 327 * Split two texts into a list of strings. Reduce the texts to a string of 328 * hashes where each Unicode character represents one line. 329 * @param text1 First string. 330 * @param text2 Second string. 331 * @return An object containing the encoded text1, the encoded text2 and 332 * the List of unique strings. The zeroth element of the List of 333 * unique strings is intentionally blank. 334 */ 335 protected LinesToCharsResult diff_linesToChars(String text1, String text2) { 336 List<String> lineArray = new ArrayList<String>(); 337 Map<String, Integer> lineHash = new HashMap<String, Integer>(); 338 // e.g. linearray[4] == "Hello\n" 339 // e.g. linehash.get("Hello\n") == 4 340 341 // "\x00" is a valid character, but various debuggers don't like it. 342 // So we'll insert a junk entry to avoid generating a null character. 343 lineArray.add(""); 344 345 String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); 346 String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); 347 return new LinesToCharsResult(chars1, chars2, lineArray); 348 } 349 350 351 /** 352 * Split a text into a list of strings. Reduce the texts to a string of 353 * hashes where each Unicode character represents one line. 354 * @param text String to encode. 355 * @param lineArray List of unique strings. 356 * @param lineHash Map of strings to indices. 357 * @return Encoded string. 358 */ 359 private String diff_linesToCharsMunge(String text, List<String> lineArray, 360 Map<String, Integer> lineHash) { 361 int lineStart = 0; 362 int lineEnd = -1; 363 String line; 364 StringBuilder chars = new StringBuilder(); 365 // Walk the text, pulling out a substring for each line. 366 // text.split('\n') would would temporarily double our memory footprint. 367 // Modifying text would create many large strings to garbage collect. 368 while (lineEnd < text.length() - 1) { 369 lineEnd = text.indexOf('\n', lineStart); 370 if (lineEnd == -1) { 371 lineEnd = text.length() - 1; 372 } 373 line = text.substring(lineStart, lineEnd + 1); 374 lineStart = lineEnd + 1; 375 376 if (lineHash.containsKey(line)) { 377 chars.append(String.valueOf((char) (int) lineHash.get(line))); 378 } else { 379 lineArray.add(line); 380 lineHash.put(line, lineArray.size() - 1); 381 chars.append(String.valueOf((char) (lineArray.size() - 1))); 382 } 383 } 384 return chars.toString(); 385 } 386 387 388 /** 389 * Rehydrate the text in a diff from a string of line hashes to real lines of 390 * text. 391 * @param diffs LinkedList of Diff objects. 392 * @param lineArray List of unique strings. 393 */ 394 protected void diff_charsToLines(LinkedList<Diff> diffs, 395 List<String> lineArray) { 396 StringBuilder text; 397 for (Diff diff : diffs) { 398 text = new StringBuilder(); 399 for (int y = 0; y < diff.text.length(); y++) { 400 text.append(lineArray.get(diff.text.charAt(y))); 401 } 402 diff.text = text.toString(); 403 } 404 } 405 406 407 /** 408 * Explore the intersection points between the two texts. 409 * @param text1 Old string to be diffed. 410 * @param text2 New string to be diffed. 411 * @return LinkedList of Diff objects or null if no diff available. 412 */ 413 protected LinkedList<Diff> diff_map(String text1, String text2) { 414 long ms_end = System.currentTimeMillis() + (long) (Diff_Timeout * 1000); 415 // Cache the text lengths to prevent multiple calls. 416 int text1_length = text1.length(); 417 int text2_length = text2.length(); 418 int max_d = text1_length + text2_length - 1; 419 boolean doubleEnd = Diff_DualThreshold * 2 < max_d; 420 List<Set<Long>> v_map1 = new ArrayList<Set<Long>>(); 421 List<Set<Long>> v_map2 = new ArrayList<Set<Long>>(); 422 Map<Integer, Integer> v1 = new HashMap<Integer, Integer>(); 423 Map<Integer, Integer> v2 = new HashMap<Integer, Integer>(); 424 v1.put(1, 0); 425 v2.put(1, 0); 426 int x, y; 427 Long footstep = 0L; // Used to track overlapping paths. 428 Map<Long, Integer> footsteps = new HashMap<Long, Integer>(); 429 boolean done = false; 430 // If the total number of characters is odd, then the front path will 431 // collide with the reverse path. 432 boolean front = ((text1_length + text2_length) % 2 == 1); 433 for (int d = 0; d < max_d; d++) { 434 // Bail out if timeout reached. 435 if (Diff_Timeout > 0 && System.currentTimeMillis() > ms_end) { 436 return null; 437 } 438 439 // Walk the front path one step. 440 v_map1.add(new HashSet<Long>()); // Adds at index 'd'. 441 for (int k = -d; k <= d; k += 2) { 442 if (k == -d || k != d && v1.get(k - 1) < v1.get(k + 1)) { 443 x = v1.get(k + 1); 444 } else { 445 x = v1.get(k - 1) + 1; 446 } 447 y = x - k; 448 if (doubleEnd) { 449 footstep = diff_footprint(x, y); 450 if (front && (footsteps.containsKey(footstep))) { 451 done = true; 452 } 453 if (!front) { 454 footsteps.put(footstep, d); 455 } 456 } 457 while (!done && x < text1_length && y < text2_length 458 && text1.charAt(x) == text2.charAt(y)) { 459 x++; 460 y++; 461 if (doubleEnd) { 462 footstep = diff_footprint(x, y); 463 if (front && (footsteps.containsKey(footstep))) { 464 done = true; 465 } 466 if (!front) { 467 footsteps.put(footstep, d); 468 } 469 } 470 } 471 v1.put(k, x); 472 v_map1.get(d).add(diff_footprint(x, y)); 473 if (x == text1_length && y == text2_length) { 474 // Reached the end in single-path mode. 475 return diff_path1(v_map1, text1, text2); 476 } else if (done) { 477 // Front path ran over reverse path. 478 v_map2 = v_map2.subList(0, footsteps.get(footstep) + 1); 479 LinkedList<Diff> a = diff_path1(v_map1, text1.substring(0, x), 480 text2.substring(0, y)); 481 a.addAll(diff_path2(v_map2, text1.substring(x), text2.substring(y))); 482 return a; 483 } 484 } 485 486 if (doubleEnd) { 487 // Walk the reverse path one step. 488 v_map2.add(new HashSet<Long>()); // Adds at index 'd'. 489 for (int k = -d; k <= d; k += 2) { 490 if (k == -d || k != d && v2.get(k - 1) < v2.get(k + 1)) { 491 x = v2.get(k + 1); 492 } else { 493 x = v2.get(k - 1) + 1; 494 } 495 y = x - k; 496 footstep = diff_footprint(text1_length - x, text2_length - y); 497 if (!front && (footsteps.containsKey(footstep))) { 498 done = true; 499 } 500 if (front) { 501 footsteps.put(footstep, d); 502 } 503 while (!done && x < text1_length && y < text2_length 504 && text1.charAt(text1_length - x - 1) 505 == text2.charAt(text2_length - y - 1)) { 506 x++; 507 y++; 508 footstep = diff_footprint(text1_length - x, text2_length - y); 509 if (!front && (footsteps.containsKey(footstep))) { 510 done = true; 511 } 512 if (front) { 513 footsteps.put(footstep, d); 514 } 515 } 516 v2.put(k, x); 517 v_map2.get(d).add(diff_footprint(x, y)); 518 if (done) { 519 // Reverse path ran over front path. 520 v_map1 = v_map1.subList(0, footsteps.get(footstep) + 1); 521 LinkedList<Diff> a 522 = diff_path1(v_map1, text1.substring(0, text1_length - x), 523 text2.substring(0, text2_length - y)); 524 a.addAll(diff_path2(v_map2, text1.substring(text1_length - x), 525 text2.substring(text2_length - y))); 526 return a; 527 } 528 } 529 } 530 } 531 // Number of diffs equals number of characters, no commonality at all. 532 return null; 533 } 534 535 536 /** 537 * Work from the middle back to the start to determine the path. 538 * @param v_map List of path sets. 539 * @param text1 Old string fragment to be diffed. 540 * @param text2 New string fragment to be diffed. 541 * @return LinkedList of Diff objects. 542 */ 543 protected LinkedList<Diff> diff_path1(List<Set<Long>> v_map, 544 String text1, String text2) { 545 LinkedList<Diff> path = new LinkedList<Diff>(); 546 int x = text1.length(); 547 int y = text2.length(); 548 Operation last_op = null; 549 for (int d = v_map.size() - 2; d >= 0; d--) { 550 while (true) { 551 if (v_map.get(d).contains(diff_footprint(x - 1, y))) { 552 x--; 553 if (last_op == Operation.DELETE) { 554 path.getFirst().text = text1.charAt(x) + path.getFirst().text; 555 } else { 556 path.addFirst(new Diff(Operation.DELETE, 557 text1.substring(x, x + 1))); 558 } 559 last_op = Operation.DELETE; 560 break; 561 } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { 562 y--; 563 if (last_op == Operation.INSERT) { 564 path.getFirst().text = text2.charAt(y) + path.getFirst().text; 565 } else { 566 path.addFirst(new Diff(Operation.INSERT, 567 text2.substring(y, y + 1))); 568 } 569 last_op = Operation.INSERT; 570 break; 571 } else { 572 x--; 573 y--; 574 assert (text1.charAt(x) == text2.charAt(y)) 575 : "No diagonal. Can't happen. (diff_path1)"; 576 if (last_op == Operation.EQUAL) { 577 path.getFirst().text = text1.charAt(x) + path.getFirst().text; 578 } else { 579 path.addFirst(new Diff(Operation.EQUAL, text1.substring(x, x + 1))); 580 } 581 last_op = Operation.EQUAL; 582 } 583 } 584 } 585 return path; 586 } 587 588 589 /** 590 * Work from the middle back to the end to determine the path. 591 * @param v_map List of path sets. 592 * @param text1 Old string fragment to be diffed. 593 * @param text2 New string fragment to be diffed. 594 * @return LinkedList of Diff objects. 595 */ 596 protected LinkedList<Diff> diff_path2(List<Set<Long>> v_map, 597 String text1, String text2) { 598 LinkedList<Diff> path = new LinkedList<Diff>(); 599 int x = text1.length(); 600 int y = text2.length(); 601 Operation last_op = null; 602 for (int d = v_map.size() - 2; d >= 0; d--) { 603 while (true) { 604 if (v_map.get(d).contains(diff_footprint(x - 1, y))) { 605 x--; 606 if (last_op == Operation.DELETE) { 607 path.getLast().text += text1.charAt(text1.length() - x - 1); 608 } else { 609 path.addLast(new Diff(Operation.DELETE, 610 text1.substring(text1.length() - x - 1, text1.length() - x))); 611 } 612 last_op = Operation.DELETE; 613 break; 614 } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { 615 y--; 616 if (last_op == Operation.INSERT) { 617 path.getLast().text += text2.charAt(text2.length() - y - 1); 618 } else { 619 path.addLast(new Diff(Operation.INSERT, 620 text2.substring(text2.length() - y - 1, text2.length() - y))); 621 } 622 last_op = Operation.INSERT; 623 break; 624 } else { 625 x--; 626 y--; 627 assert (text1.charAt(text1.length() - x - 1) 628 == text2.charAt(text2.length() - y - 1)) 629 : "No diagonal. Can't happen. (diff_path2)"; 630 if (last_op == Operation.EQUAL) { 631 path.getLast().text += text1.charAt(text1.length() - x - 1); 632 } else { 633 path.addLast(new Diff(Operation.EQUAL, 634 text1.substring(text1.length() - x - 1, text1.length() - x))); 635 } 636 last_op = Operation.EQUAL; 637 } 638 } 639 } 640 return path; 641 } 642 643 644 /** 645 * Compute a good hash of two integers. 646 * @param x First int. 647 * @param y Second int. 648 * @return A long made up of both ints. 649 */ 650 protected long diff_footprint(int x, int y) { 651 // The maximum size for a long is 9,223,372,036,854,775,807 652 // The maximum size for an int is 2,147,483,647 653 // Two ints fit nicely in one long. 654 long result = x; 655 result = result << 32; 656 result += y; 657 return result; 658 } 659 660 661 /** 662 * Determine the common prefix of two strings 663 * @param text1 First string. 664 * @param text2 Second string. 665 * @return The number of characters common to the start of each string. 666 */ 667 public int diff_commonPrefix(String text1, String text2) { 668 // Performance analysis: http://neil.fraser.name/news/2007/10/09/ 669 int n = Math.min(text1.length(), text2.length()); 670 for (int i = 0; i < n; i++) { 671 if (text1.charAt(i) != text2.charAt(i)) { 672 return i; 673 } 674 } 675 return n; 676 } 677 678 679 /** 680 * Determine the common suffix of two strings 681 * @param text1 First string. 682 * @param text2 Second string. 683 * @return The number of characters common to the end of each string. 684 */ 685 public int diff_commonSuffix(String text1, String text2) { 686 // Performance analysis: http://neil.fraser.name/news/2007/10/09/ 687 int text1_length = text1.length(); 688 int text2_length = text2.length(); 689 int n = Math.min(text1_length, text2_length); 690 for (int i = 1; i <= n; i++) { 691 if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) { 692 return i - 1; 693 } 694 } 695 return n; 696 } 697 698 699 /** 700 * Do the two texts share a substring which is at least half the length of 701 * the longer text? 702 * @param text1 First string. 703 * @param text2 Second string. 704 * @return Five element String array, containing the prefix of text1, the 705 * suffix of text1, the prefix of text2, the suffix of text2 and the 706 * common middle. Or null if there was no match. 707 */ 708 protected String[] diff_halfMatch(String text1, String text2) { 709 String longtext = text1.length() > text2.length() ? text1 : text2; 710 String shorttext = text1.length() > text2.length() ? text2 : text1; 711 if (longtext.length() < 10 || shorttext.length() < 1) { 712 return null; // Pointless. 713 } 714 715 // First check if the second quarter is the seed for a half-match. 716 String[] hm1 = diff_halfMatchI(longtext, shorttext, 717 (longtext.length() + 3) / 4); 718 // Check again based on the third quarter. 719 String[] hm2 = diff_halfMatchI(longtext, shorttext, 720 (longtext.length() + 1) / 2); 721 String[] hm; 722 if (hm1 == null && hm2 == null) { 723 return null; 724 } else if (hm2 == null) { 725 hm = hm1; 726 } else if (hm1 == null) { 727 hm = hm2; 728 } else { 729 // Both matched. Select the longest. 730 hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; 731 } 732 733 // A half-match was found, sort out the return data. 734 if (text1.length() > text2.length()) { 735 return hm; 736 //return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; 737 } else { 738 return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]}; 739 } 740 } 741 742 743 /** 744 * Does a substring of shorttext exist within longtext such that the 745 * substring is at least half the length of longtext? 746 * @param longtext Longer string. 747 * @param shorttext Shorter string. 748 * @param i Start index of quarter length substring within longtext. 749 * @return Five element String array, containing the prefix of longtext, the 750 * suffix of longtext, the prefix of shorttext, the suffix of shorttext 751 * and the common middle. Or null if there was no match. 752 */ 753 private String[] diff_halfMatchI(String longtext, String shorttext, int i) { 754 // Start with a 1/4 length substring at position i as a seed. 755 String seed = longtext.substring(i, i + longtext.length() / 4); 756 int j = -1; 757 String best_common = ""; 758 String best_longtext_a = "", best_longtext_b = ""; 759 String best_shorttext_a = "", best_shorttext_b = ""; 760 while ((j = shorttext.indexOf(seed, j + 1)) != -1) { 761 int prefixLength = diff_commonPrefix(longtext.substring(i), 762 shorttext.substring(j)); 763 int suffixLength = diff_commonSuffix(longtext.substring(0, i), 764 shorttext.substring(0, j)); 765 if (best_common.length() < suffixLength + prefixLength) { 766 best_common = shorttext.substring(j - suffixLength, j) 767 + shorttext.substring(j, j + prefixLength); 768 best_longtext_a = longtext.substring(0, i - suffixLength); 769 best_longtext_b = longtext.substring(i + prefixLength); 770 best_shorttext_a = shorttext.substring(0, j - suffixLength); 771 best_shorttext_b = shorttext.substring(j + prefixLength); 772 } 773 } 774 if (best_common.length() >= longtext.length() / 2) { 775 return new String[]{best_longtext_a, best_longtext_b, 776 best_shorttext_a, best_shorttext_b, best_common}; 777 } else { 778 return null; 779 } 780 } 781 782 783 /** 784 * Reduce the number of edits by eliminating semantically trivial equalities. 785 * @param diffs LinkedList of Diff objects. 786 */ 787 public void diff_cleanupSemantic(LinkedList<Diff> diffs) { 788 if (diffs.isEmpty()) { 789 return; 790 } 791 boolean changes = false; 792 Stack<Diff> equalities = new Stack<Diff>(); // Stack of qualities. 793 String lastequality = null; // Always equal to equalities.lastElement().text 794 ListIterator<Diff> pointer = diffs.listIterator(); 795 // Number of characters that changed prior to the equality. 796 int length_changes1 = 0; 797 // Number of characters that changed after the equality. 798 int length_changes2 = 0; 799 Diff thisDiff = pointer.next(); 800 while (thisDiff != null) { 801 if (thisDiff.operation == Operation.EQUAL) { 802 // equality found 803 equalities.push(thisDiff); 804 length_changes1 = length_changes2; 805 length_changes2 = 0; 806 lastequality = thisDiff.text; 807 } else { 808 // an insertion or deletion 809 length_changes2 += thisDiff.text.length(); 810 if (lastequality != null && (lastequality.length() <= length_changes1) 811 && (lastequality.length() <= length_changes2)) { 812 //System.out.println("Splitting: '" + lastequality + "'"); 813 // Walk back to offending equality. 814 while (thisDiff != equalities.lastElement()) { 815 thisDiff = pointer.previous(); 816 } 817 pointer.next(); 818 819 // Replace equality with a delete. 820 pointer.set(new Diff(Operation.DELETE, lastequality)); 821 // Insert a corresponding an insert. 822 pointer.add(new Diff(Operation.INSERT, lastequality)); 823 824 equalities.pop(); // Throw away the equality we just deleted. 825 if (!equalities.empty()) { 826 // Throw away the previous equality (it needs to be reevaluated). 827 equalities.pop(); 828 } 829 if (equalities.empty()) { 830 // There are no previous equalities, walk back to the start. 831 while (pointer.hasPrevious()) { 832 pointer.previous(); 833 } 834 } else { 835 // There is a safe equality we can fall back to. 836 thisDiff = equalities.lastElement(); 837 while (thisDiff != pointer.previous()) { 838 // Intentionally empty loop. 839 } 840 } 841 842 length_changes1 = 0; // Reset the counters. 843 length_changes2 = 0; 844 lastequality = null; 845 changes = true; 846 } 847 } 848 thisDiff = pointer.hasNext() ? pointer.next() : null; 849 } 850 851 if (changes) { 852 diff_cleanupMerge(diffs); 853 } 854 diff_cleanupSemanticLossless(diffs); 855 } 856 857 858 /** 859 * Look for single edits surrounded on both sides by equalities 860 * which can be shifted sideways to align the edit to a word boundary. 861 * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 862 * @param diffs LinkedList of Diff objects. 863 */ 864 public void diff_cleanupSemanticLossless(LinkedList<Diff> diffs) { 865 String equality1, edit, equality2; 866 String commonString; 867 int commonOffset; 868 int score, bestScore; 869 String bestEquality1, bestEdit, bestEquality2; 870 // Create a new iterator at the start. 871 ListIterator<Diff> pointer = diffs.listIterator(); 872 Diff prevDiff = pointer.hasNext() ? pointer.next() : null; 873 Diff thisDiff = pointer.hasNext() ? pointer.next() : null; 874 Diff nextDiff = pointer.hasNext() ? pointer.next() : null; 875 // Intentionally ignore the first and last element (don't need checking). 876 while (nextDiff != null) { 877 if (prevDiff.operation == Operation.EQUAL && 878 nextDiff.operation == Operation.EQUAL) { 879 // This is a single edit surrounded by equalities. 880 equality1 = prevDiff.text; 881 edit = thisDiff.text; 882 equality2 = nextDiff.text; 883 884 // First, shift the edit as far left as possible. 885 commonOffset = diff_commonSuffix(equality1, edit); 886 if (commonOffset != 0) { 887 commonString = edit.substring(edit.length() - commonOffset); 888 equality1 = equality1.substring(0, equality1.length() - commonOffset); 889 edit = commonString + edit.substring(0, edit.length() - commonOffset); 890 equality2 = commonString + equality2; 891 } 892 893 // Second, step character by character right, looking for the best fit. 894 bestEquality1 = equality1; 895 bestEdit = edit; 896 bestEquality2 = equality2; 897 bestScore = diff_cleanupSemanticScore(equality1, edit) 898 + diff_cleanupSemanticScore(edit, equality2); 899 while (edit.length() != 0 && equality2.length() != 0 900 && edit.charAt(0) == equality2.charAt(0)) { 901 equality1 += edit.charAt(0); 902 edit = edit.substring(1) + equality2.charAt(0); 903 equality2 = equality2.substring(1); 904 score = diff_cleanupSemanticScore(equality1, edit) 905 + diff_cleanupSemanticScore(edit, equality2); 906 // The >= encourages trailing rather than leading whitespace on edits. 907 if (score >= bestScore) { 908 bestScore = score; 909 bestEquality1 = equality1; 910 bestEdit = edit; 911 bestEquality2 = equality2; 912 } 913 } 914 915 if (!prevDiff.text.equals(bestEquality1)) { 916 // We have an improvement, save it back to the diff. 917 if (bestEquality1.length() != 0) { 918 prevDiff.text = bestEquality1; 919 } else { 920 pointer.previous(); // Walk past nextDiff. 921 pointer.previous(); // Walk past thisDiff. 922 pointer.previous(); // Walk past prevDiff. 923 pointer.remove(); // Delete prevDiff. 924 pointer.next(); // Walk past thisDiff. 925 pointer.next(); // Walk past nextDiff. 926 } 927 thisDiff.text = bestEdit; 928 if (bestEquality2.length() != 0) { 929 nextDiff.text = bestEquality2; 930 } else { 931 pointer.remove(); // Delete nextDiff. 932 nextDiff = thisDiff; 933 thisDiff = prevDiff; 934 } 935 } 936 } 937 prevDiff = thisDiff; 938 thisDiff = nextDiff; 939 nextDiff = pointer.hasNext() ? pointer.next() : null; 940 } 941 } 942 943 944 /** 945 * Given two strings, compute a score representing whether the internal 946 * boundary falls on logical boundaries. 947 * Scores range from 5 (best) to 0 (worst). 948 * @param one First string. 949 * @param two Second string. 950 * @return The score. 951 */ 952 private int diff_cleanupSemanticScore(String one, String two) { 953 if (one.length() == 0 || two.length() == 0) { 954 // Edges are the best. 955 return 5; 956 } 957 958 // Each port of this function behaves slightly differently due to 959 // subtle differences in each language's definition of things like 960 // 'whitespace'. Since this function's purpose is largely cosmetic, 961 // the choice has been made to use each language's native features 962 // rather than force total conformity. 963 int score = 0; 964 // One point for non-alphanumeric. 965 if (!Character.isLetterOrDigit(one.charAt(one.length() - 1)) 966 || !Character.isLetterOrDigit(two.charAt(0))) { 967 score++; 968 // Two points for whitespace. 969 if (Character.isWhitespace(one.charAt(one.length() - 1)) 970 || Character.isWhitespace(two.charAt(0))) { 971 score++; 972 // Three points for line breaks. 973 if (Character.getType(one.charAt(one.length() - 1)) == Character.CONTROL 974 || Character.getType(two.charAt(0)) == Character.CONTROL) { 975 score++; 976 // Four points for blank lines. 977 if (BLANKLINEEND.matcher(one).find() 978 || BLANKLINESTART.matcher(two).find()) { 979 score++; 980 } 981 } 982 } 983 } 984 return score; 985 } 986 987 988 private Pattern BLANKLINEEND 989 = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL); 990 private Pattern BLANKLINESTART 991 = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL); 992 993 994 /** 995 * Reduce the number of edits by eliminating operationally trivial equalities. 996 * @param diffs LinkedList of Diff objects. 997 */ 998 public void diff_cleanupEfficiency(LinkedList<Diff> diffs) { 999 if (diffs.isEmpty()) { 1000 return; 1001 } 1002 boolean changes = false; 1003 Stack<Diff> equalities = new Stack<Diff>(); // Stack of equalities. 1004 String lastequality = null; // Always equal to equalities.lastElement().text 1005 ListIterator<Diff> pointer = diffs.listIterator(); 1006 // Is there an insertion operation before the last equality. 1007 boolean pre_ins = false; 1008 // Is there a deletion operation before the last equality. 1009 boolean pre_del = false; 1010 // Is there an insertion operation after the last equality. 1011 boolean post_ins = false; 1012 // Is there a deletion operation after the last equality. 1013 boolean post_del = false; 1014 Diff thisDiff = pointer.next(); 1015 Diff safeDiff = thisDiff; // The last Diff that is known to be unsplitable. 1016 while (thisDiff != null) { 1017 if (thisDiff.operation == Operation.EQUAL) { 1018 // equality found 1019 if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) { 1020 // Candidate found. 1021 equalities.push(thisDiff); 1022 pre_ins = post_ins; 1023 pre_del = post_del; 1024 lastequality = thisDiff.text; 1025 } else { 1026 // Not a candidate, and can never become one. 1027 equalities.clear(); 1028 lastequality = null; 1029 safeDiff = thisDiff; 1030 } 1031 post_ins = post_del = false; 1032 } else { 1033 // an insertion or deletion 1034 if (thisDiff.operation == Operation.DELETE) { 1035 post_del = true; 1036 } else { 1037 post_ins = true; 1038 } 1039 /* 1040 * Five types to be split: 1041 * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> 1042 * <ins>A</ins>X<ins>C</ins><del>D</del> 1043 * <ins>A</ins><del>B</del>X<ins>C</ins> 1044 * <ins>A</del>X<ins>C</ins><del>D</del> 1045 * <ins>A</ins><del>B</del>X<del>C</del> 1046 */ 1047 if (lastequality != null 1048 && ((pre_ins && pre_del && post_ins && post_del) 1049 || ((lastequality.length() < Diff_EditCost / 2) 1050 && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) 1051 + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { 1052 //System.out.println("Splitting: '" + lastequality + "'"); 1053 // Walk back to offending equality. 1054 while (thisDiff != equalities.lastElement()) { 1055 thisDiff = pointer.previous(); 1056 } 1057 pointer.next(); 1058 1059 // Replace equality with a delete. 1060 pointer.set(new Diff(Operation.DELETE, lastequality)); 1061 // Insert a corresponding an insert. 1062 pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality)); 1063 1064 equalities.pop(); // Throw away the equality we just deleted. 1065 lastequality = null; 1066 if (pre_ins && pre_del) { 1067 // No changes made which could affect previous entry, keep going. 1068 post_ins = post_del = true; 1069 equalities.clear(); 1070 safeDiff = thisDiff; 1071 } else { 1072 if (!equalities.empty()) { 1073 // Throw away the previous equality (it needs to be reevaluated). 1074 equalities.pop(); 1075 } 1076 if (equalities.empty()) { 1077 // There are no previous questionable equalities, 1078 // walk back to the last known safe diff. 1079 thisDiff = safeDiff; 1080 } else { 1081 // There is an equality we can fall back to. 1082 thisDiff = equalities.lastElement(); 1083 } 1084 while (thisDiff != pointer.previous()) { 1085 // Intentionally empty loop. 1086 } 1087 post_ins = post_del = false; 1088 } 1089 1090 changes = true; 1091 } 1092 } 1093 thisDiff = pointer.hasNext() ? pointer.next() : null; 1094 } 1095 1096 if (changes) { 1097 diff_cleanupMerge(diffs); 1098 } 1099 } 1100 1101 1102 /** 1103 * Reorder and merge like edit sections. Merge equalities. 1104 * Any edit section can move as long as it doesn't cross an equality. 1105 * @param diffs LinkedList of Diff objects. 1106 */ 1107 public void diff_cleanupMerge(LinkedList<Diff> diffs) { 1108 diffs.add(new Diff(Operation.EQUAL, "")); // Add a dummy entry at the end. 1109 ListIterator<Diff> pointer = diffs.listIterator(); 1110 int count_delete = 0; 1111 int count_insert = 0; 1112 String text_delete = ""; 1113 String text_insert = ""; 1114 Diff thisDiff = pointer.next(); 1115 Diff prevEqual = null; 1116 int commonlength; 1117 while (thisDiff != null) { 1118 switch (thisDiff.operation) { 1119 case INSERT: 1120 count_insert++; 1121 text_insert += thisDiff.text; 1122 prevEqual = null; 1123 break; 1124 case DELETE: 1125 count_delete++; 1126 text_delete += thisDiff.text; 1127 prevEqual = null; 1128 break; 1129 case EQUAL: 1130 if (count_delete != 0 || count_insert != 0) { 1131 // Delete the offending records. 1132 pointer.previous(); // Reverse direction. 1133 while (count_delete-- > 0) { 1134 pointer.previous(); 1135 pointer.remove(); 1136 } 1137 while (count_insert-- > 0) { 1138 pointer.previous(); 1139 pointer.remove(); 1140 } 1141 if (count_delete != 0 && count_insert != 0) { 1142 // Factor out any common prefixies. 1143 commonlength = diff_commonPrefix(text_insert, text_delete); 1144 if (commonlength != 0) { 1145 if (pointer.hasPrevious()) { 1146 thisDiff = pointer.previous(); 1147 assert thisDiff.operation == Operation.EQUAL 1148 : "Previous diff should have been an equality."; 1149 thisDiff.text += text_insert.substring(0, commonlength); 1150 pointer.next(); 1151 } else { 1152 pointer.add(new Diff(Operation.EQUAL, 1153 text_insert.substring(0, commonlength))); 1154 } 1155 text_insert = text_insert.substring(commonlength); 1156 text_delete = text_delete.substring(commonlength); 1157 } 1158 // Factor out any common suffixies. 1159 commonlength = diff_commonSuffix(text_insert, text_delete); 1160 if (commonlength != 0) { 1161 thisDiff = pointer.next(); 1162 thisDiff.text = text_insert.substring(text_insert.length() 1163 - commonlength) + thisDiff.text; 1164 text_insert = text_insert.substring(0, text_insert.length() 1165 - commonlength); 1166 text_delete = text_delete.substring(0, text_delete.length() 1167 - commonlength); 1168 pointer.previous(); 1169 } 1170 } 1171 // Insert the merged records. 1172 if (text_delete.length() != 0) { 1173 pointer.add(new Diff(Operation.DELETE, text_delete)); 1174 } 1175 if (text_insert.length() != 0) { 1176 pointer.add(new Diff(Operation.INSERT, text_insert)); 1177 } 1178 // Step forward to the equality. 1179 thisDiff = pointer.hasNext() ? pointer.next() : null; 1180 } else if (prevEqual != null) { 1181 // Merge this equality with the previous one. 1182 prevEqual.text += thisDiff.text; 1183 pointer.remove(); 1184 thisDiff = pointer.previous(); 1185 pointer.next(); // Forward direction 1186 } 1187 count_insert = 0; 1188 count_delete = 0; 1189 text_delete = ""; 1190 text_insert = ""; 1191 prevEqual = thisDiff; 1192 break; 1193 } 1194 thisDiff = pointer.hasNext() ? pointer.next() : null; 1195 } 1196 // System.out.println(diff); 1197 if (diffs.getLast().text.length() == 0) { 1198 diffs.removeLast(); // Remove the dummy entry at the end. 1199 } 1200 1201 /* 1202 * Second pass: look for single edits surrounded on both sides by equalities 1203 * which can be shifted sideways to eliminate an equality. 1204 * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 1205 */ 1206 boolean changes = false; 1207 // Create a new iterator at the start. 1208 // (As opposed to walking the current one back.) 1209 pointer = diffs.listIterator(); 1210 Diff prevDiff = pointer.hasNext() ? pointer.next() : null; 1211 thisDiff = pointer.hasNext() ? pointer.next() : null; 1212 Diff nextDiff = pointer.hasNext() ? pointer.next() : null; 1213 // Intentionally ignore the first and last element (don't need checking). 1214 while (nextDiff != null) { 1215 if (prevDiff.operation == Operation.EQUAL && 1216 nextDiff.operation == Operation.EQUAL) { 1217 // This is a single edit surrounded by equalities. 1218 if (thisDiff.text.endsWith(prevDiff.text)) { 1219 // Shift the edit over the previous equality. 1220 thisDiff.text = prevDiff.text 1221 + thisDiff.text.substring(0, thisDiff.text.length() 1222 - prevDiff.text.length()); 1223 nextDiff.text = prevDiff.text + nextDiff.text; 1224 pointer.previous(); // Walk past nextDiff. 1225 pointer.previous(); // Walk past thisDiff. 1226 pointer.previous(); // Walk past prevDiff. 1227 pointer.remove(); // Delete prevDiff. 1228 pointer.next(); // Walk past thisDiff. 1229 thisDiff = pointer.next(); // Walk past nextDiff. 1230 nextDiff = pointer.hasNext() ? pointer.next() : null; 1231 changes = true; 1232 } else if (thisDiff.text.startsWith(nextDiff.text)) { 1233 // Shift the edit over the next equality. 1234 prevDiff.text += nextDiff.text; 1235 thisDiff.text = thisDiff.text.substring(nextDiff.text.length()) 1236 + nextDiff.text; 1237 pointer.remove(); // Delete nextDiff. 1238 nextDiff = pointer.hasNext() ? pointer.next() : null; 1239 changes = true; 1240 } 1241 } 1242 prevDiff = thisDiff; 1243 thisDiff = nextDiff; 1244 nextDiff = pointer.hasNext() ? pointer.next() : null; 1245 } 1246 // If shifts were made, the diff needs reordering and another shift sweep. 1247 if (changes) { 1248 diff_cleanupMerge(diffs); 1249 } 1250 } 1251 1252 1253 /** 1254 * loc is a location in text1, compute and return the equivalent location in 1255 * text2. 1256 * e.g. "The cat" vs "The big cat", 1->1, 5->8 1257 * @param diffs LinkedList of Diff objects. 1258 * @param loc Location within text1. 1259 * @return Location within text2. 1260 */ 1261 public int diff_xIndex(LinkedList<Diff> diffs, int loc) { 1262 int chars1 = 0; 1263 int chars2 = 0; 1264 int last_chars1 = 0; 1265 int last_chars2 = 0; 1266 Diff lastDiff = null; 1267 for (Diff aDiff : diffs) { 1268 if (aDiff.operation != Operation.INSERT) { 1269 // Equality or deletion. 1270 chars1 += aDiff.text.length(); 1271 } 1272 if (aDiff.operation != Operation.DELETE) { 1273 // Equality or insertion. 1274 chars2 += aDiff.text.length(); 1275 } 1276 if (chars1 > loc) { 1277 // Overshot the location. 1278 lastDiff = aDiff; 1279 break; 1280 } 1281 last_chars1 = chars1; 1282 last_chars2 = chars2; 1283 } 1284 if (lastDiff != null && lastDiff.operation == Operation.DELETE) { 1285 // The location was deleted. 1286 return last_chars2; 1287 } 1288 // Add the remaining character length. 1289 return last_chars2 + (loc - last_chars1); 1290 } 1291 1292 1293 /** 1294 * Convert a Diff list into a pretty HTML report. 1295 * @param diffs LinkedList of Diff objects. 1296 * @return HTML representation. 1297 */ 1298 public String diff_prettyHtml(LinkedList<Diff> diffs) { 1299 StringBuilder html = new StringBuilder(); 1300 int i = 0; 1301 for (Diff aDiff : diffs) { 1302 String text = aDiff.text.replace("&", "&").replace("<", "<") 1303 .replace(">", ">").replace("\n", "¶<BR>"); 1304 switch (aDiff.operation) { 1305 case INSERT: 1306 html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=").append(i) 1307 .append("\">").append(text).append("</INS>"); 1308 break; 1309 case DELETE: 1310 html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=").append(i) 1311 .append("\">").append(text).append("</DEL>"); 1312 break; 1313 case EQUAL: 1314 html.append("<SPAN TITLE=\"i=").append(i).append("\">").append(text) 1315 .append("</SPAN>"); 1316 break; 1317 } 1318 if (aDiff.operation != Operation.DELETE) { 1319 i += aDiff.text.length(); 1320 } 1321 } 1322 return html.toString(); 1323 } 1324 1325 1326 /** 1327 * Compute and return the source text (all equalities and deletions). 1328 * @param diffs LinkedList of Diff objects. 1329 * @return Source text. 1330 */ 1331 public String diff_text1(LinkedList<Diff> diffs) { 1332 StringBuilder text = new StringBuilder(); 1333 for (Diff aDiff : diffs) { 1334 if (aDiff.operation != Operation.INSERT) { 1335 text.append(aDiff.text); 1336 } 1337 } 1338 return text.toString(); 1339 } 1340 1341 1342 /** 1343 * Compute and return the destination text (all equalities and insertions). 1344 * @param diffs LinkedList of Diff objects. 1345 * @return Destination text. 1346 */ 1347 public String diff_text2(LinkedList<Diff> diffs) { 1348 StringBuilder text = new StringBuilder(); 1349 for (Diff aDiff : diffs) { 1350 if (aDiff.operation != Operation.DELETE) { 1351 text.append(aDiff.text); 1352 } 1353 } 1354 return text.toString(); 1355 } 1356 1357 1358 /** 1359 * Compute the Levenshtein distance; the number of inserted, deleted or 1360 * substituted characters. 1361 * @param diffs LinkedList of Diff objects. 1362 * @return Number of changes. 1363 */ 1364 public int diff_levenshtein(LinkedList<Diff> diffs) { 1365 int levenshtein = 0; 1366 int insertions = 0; 1367 int deletions = 0; 1368 for (Diff aDiff : diffs) { 1369 switch (aDiff.operation) { 1370 case INSERT: 1371 insertions += aDiff.text.length(); 1372 break; 1373 case DELETE: 1374 deletions += aDiff.text.length(); 1375 break; 1376 case EQUAL: 1377 // A deletion and an insertion is one substitution. 1378 levenshtein += Math.max(insertions, deletions); 1379 insertions = 0; 1380 deletions = 0; 1381 break; 1382 } 1383 } 1384 levenshtein += Math.max(insertions, deletions); 1385 return levenshtein; 1386 } 1387 1388 1389 /** 1390 * Crush the diff into an encoded string which describes the operations 1391 * required to transform text1 into text2. 1392 * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. 1393 * Operations are tab-separated. Inserted text is escaped using %xx notation. 1394 * @param diffs Array of diff tuples. 1395 * @return Delta text. 1396 */ 1397 public String diff_toDelta(LinkedList<Diff> diffs) { 1398 StringBuilder text = new StringBuilder(); 1399 for (Diff aDiff : diffs) { 1400 switch (aDiff.operation) { 1401 case INSERT: 1402 try { 1403 text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8") 1404 .replace('+', ' ')).append("\t"); 1405 } catch (UnsupportedEncodingException e) { 1406 // Not likely on modern system. 1407 throw new Error("This system does not support UTF-8.", e); 1408 } 1409 break; 1410 case DELETE: 1411 text.append("-").append(aDiff.text.length()).append("\t"); 1412 break; 1413 case EQUAL: 1414 text.append("=").append(aDiff.text.length()).append("\t"); 1415 break; 1416 } 1417 } 1418 String delta = text.toString(); 1419 if (delta.length() != 0) { 1420 // Strip off trailing tab character. 1421 delta = delta.substring(0, delta.length() - 1); 1422 delta = unescapeForEncodeUriCompatability(delta); 1423 } 1424 return delta; 1425 } 1426 1427 1428 /** 1429 * Given the original text1, and an encoded string which describes the 1430 * operations required to transform text1 into text2, compute the full diff. 1431 * @param text1 Source string for the diff. 1432 * @param delta Delta text. 1433 * @return Array of diff tuples or null if invalid. 1434 * @throws IllegalArgumentException If invalid input. 1435 */ 1436 public LinkedList<Diff> diff_fromDelta(String text1, String delta) 1437 throws IllegalArgumentException { 1438 LinkedList<Diff> diffs = new LinkedList<Diff>(); 1439 int pointer = 0; // Cursor in text1 1440 String[] tokens = delta.split("\t"); 1441 for (String token : tokens) { 1442 if (token.length() == 0) { 1443 // Blank tokens are ok (from a trailing \t). 1444 continue; 1445 } 1446 // Each token begins with a one character parameter which specifies the 1447 // operation of this token (delete, insert, equality). 1448 String param = token.substring(1); 1449 switch (token.charAt(0)) { 1450 case '+': 1451 // decode would change all "+" to " " 1452 param = param.replace("+", "%2B"); 1453 try { 1454 param = URLDecoder.decode(param, "UTF-8"); 1455 } catch (UnsupportedEncodingException e) { 1456 // Not likely on modern system. 1457 throw new Error("This system does not support UTF-8.", e); 1458 } catch (IllegalArgumentException e) { 1459 // Malformed URI sequence. 1460 throw new IllegalArgumentException( 1461 "Illegal escape in diff_fromDelta: " + param, e); 1462 } 1463 diffs.add(new Diff(Operation.INSERT, param)); 1464 break; 1465 case '-': 1466 // Fall through. 1467 case '=': 1468 int n; 1469 try { 1470 n = Integer.parseInt(param); 1471 } catch (NumberFormatException e) { 1472 throw new IllegalArgumentException( 1473 "Invalid number in diff_fromDelta: " + param, e); 1474 } 1475 if (n < 0) { 1476 throw new IllegalArgumentException( 1477 "Negative number in diff_fromDelta: " + param); 1478 } 1479 String text; 1480 try { 1481 text = text1.substring(pointer, pointer += n); 1482 } catch (StringIndexOutOfBoundsException e) { 1483 throw new IllegalArgumentException("Delta length (" + pointer 1484 + ") larger than source text length (" + text1.length() 1485 + ").", e); 1486 } 1487 if (token.charAt(0) == '=') { 1488 diffs.add(new Diff(Operation.EQUAL, text)); 1489 } else { 1490 diffs.add(new Diff(Operation.DELETE, text)); 1491 } 1492 break; 1493 default: 1494 // Anything else is an error. 1495 throw new IllegalArgumentException( 1496 "Invalid diff operation in diff_fromDelta: " + token.charAt(0)); 1497 } 1498 } 1499 if (pointer != text1.length()) { 1500 throw new IllegalArgumentException("Delta length (" + pointer 1501 + ") smaller than source text length (" + text1.length() + ")."); 1502 } 1503 return diffs; 1504 } 1505 1506 1507 // MATCH FUNCTIONS 1508 1509 1510 /** 1511 * Locate the best instance of 'pattern' in 'text' near 'loc'. 1512 * Returns -1 if no match found. 1513 * @param text The text to search. 1514 * @param pattern The pattern to search for. 1515 * @param loc The location to search around. 1516 * @return Best match index or -1. 1517 */ 1518 public int match_main(String text, String pattern, int loc) { 1519 // Check for null inputs. 1520 if (text == null || pattern == null) { 1521 throw new IllegalArgumentException("Null inputs. (match_main)"); 1522 } 1523 1524 loc = Math.max(0, Math.min(loc, text.length())); 1525 if (text.equals(pattern)) { 1526 // Shortcut (potentially not guaranteed by the algorithm) 1527 return 0; 1528 } else if (text.length() == 0) { 1529 // Nothing to match. 1530 return -1; 1531 } else if (loc + pattern.length() <= text.length() 1532 && text.substring(loc, loc + pattern.length()).equals(pattern)) { 1533 // Perfect match at the perfect spot! (Includes case of null pattern) 1534 return loc; 1535 } else { 1536 // Do a fuzzy compare. 1537 return match_bitap(text, pattern, loc); 1538 } 1539 } 1540 1541 1542 /** 1543 * Locate the best instance of 'pattern' in 'text' near 'loc' using the 1544 * Bitap algorithm. Returns -1 if no match found. 1545 * @param text The text to search. 1546 * @param pattern The pattern to search for. 1547 * @param loc The location to search around. 1548 * @return Best match index or -1. 1549 */ 1550 protected int match_bitap(String text, String pattern, int loc) { 1551 assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits) 1552 : "Pattern too long for this application."; 1553 1554 // Initialise the alphabet. 1555 Map<Character, Integer> s = match_alphabet(pattern); 1556 1557 // Highest score beyond which we give up. 1558 double score_threshold = Match_Threshold; 1559 // Is there a nearby exact match? (speedup) 1560 int best_loc = text.indexOf(pattern, loc); 1561 if (best_loc != -1) { 1562 score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), 1563 score_threshold); 1564 // What about in the other direction? (speedup) 1565 best_loc = text.lastIndexOf(pattern, loc + pattern.length()); 1566 if (best_loc != -1) { 1567 score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), 1568 score_threshold); 1569 } 1570 } 1571 1572 // Initialise the bit arrays. 1573 int matchmask = 1 << (pattern.length() - 1); 1574 best_loc = -1; 1575 1576 int bin_min, bin_mid; 1577 int bin_max = pattern.length() + text.length(); 1578 // Empty initialization added to appease Java compiler. 1579 int[] last_rd = new int[0]; 1580 for (int d = 0; d < pattern.length(); d++) { 1581 // Scan for the best match; each iteration allows for one more error. 1582 // Run a binary search to determine how far from 'loc' we can stray at 1583 // this error level. 1584 bin_min = 0; 1585 bin_mid = bin_max; 1586 while (bin_min < bin_mid) { 1587 if (match_bitapScore(d, loc + bin_mid, loc, pattern) 1588 <= score_threshold) { 1589 bin_min = bin_mid; 1590 } else { 1591 bin_max = bin_mid; 1592 } 1593 bin_mid = (bin_max - bin_min) / 2 + bin_min; 1594 } 1595 // Use the result from this iteration as the maximum for the next. 1596 bin_max = bin_mid; 1597 int start = Math.max(1, loc - bin_mid + 1); 1598 int finish = Math.min(loc + bin_mid, text.length()) + pattern.length(); 1599 1600 int[] rd = new int[finish + 2]; 1601 rd[finish + 1] = (1 << d) - 1; 1602 for (int j = finish; j >= start; j--) { 1603 int charMatch; 1604 if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) { 1605 // Out of range. 1606 charMatch = 0; 1607 } else { 1608 charMatch = s.get(text.charAt(j - 1)); 1609 } 1610 if (d == 0) { 1611 // First pass: exact match. 1612 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; 1613 } else { 1614 // Subsequent passes: fuzzy match. 1615 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch 1616 | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; 1617 } 1618 if ((rd[j] & matchmask) != 0) { 1619 double score = match_bitapScore(d, j - 1, loc, pattern); 1620 // This match will almost certainly be better than any existing 1621 // match. But check anyway. 1622 if (score <= score_threshold) { 1623 // Told you so. 1624 score_threshold = score; 1625 best_loc = j - 1; 1626 if (best_loc > loc) { 1627 // When passing loc, don't exceed our current distance from loc. 1628 start = Math.max(1, 2 * loc - best_loc); 1629 } else { 1630 // Already passed loc, downhill from here on in. 1631 break; 1632 } 1633 } 1634 } 1635 } 1636 if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { 1637 // No hope for a (better) match at greater error levels. 1638 break; 1639 } 1640 last_rd = rd; 1641 } 1642 return best_loc; 1643 } 1644 1645 1646 /** 1647 * Compute and return the score for a match with e errors and x location. 1648 * @param e Number of errors in match. 1649 * @param x Location of match. 1650 * @param loc Expected location of match. 1651 * @param pattern Pattern being sought. 1652 * @return Overall score for match (0.0 = good, 1.0 = bad). 1653 */ 1654 private double match_bitapScore(int e, int x, int loc, String pattern) { 1655 float accuracy = (float) e / pattern.length(); 1656 int proximity = Math.abs(loc - x); 1657 if (Match_Distance == 0) { 1658 // Dodge divide by zero error. 1659 return proximity == 0 ? accuracy : 1.0; 1660 } 1661 return accuracy + (proximity / (float) Match_Distance); 1662 } 1663 1664 1665 /** 1666 * Initialise the alphabet for the Bitap algorithm. 1667 * @param pattern The text to encode. 1668 * @return Hash of character locations. 1669 */ 1670 protected Map<Character, Integer> match_alphabet(String pattern) { 1671 Map<Character, Integer> s = new HashMap<Character, Integer>(); 1672 char[] char_pattern = pattern.toCharArray(); 1673 for (char c : char_pattern) { 1674 s.put(c, 0); 1675 } 1676 int i = 0; 1677 for (char c : char_pattern) { 1678 s.put(c, s.get(c) | (1 << (pattern.length() - i - 1))); 1679 i++; 1680 } 1681 return s; 1682 } 1683 1684 1685 // PATCH FUNCTIONS 1686 1687 1688 /** 1689 * Increase the context until it is unique, 1690 * but don't let the pattern expand beyond Match_MaxBits. 1691 * @param patch The patch to grow. 1692 * @param text Source text. 1693 */ 1694 protected void patch_addContext(Patch patch, String text) { 1695 if (text.length() == 0) { 1696 return; 1697 } 1698 String pattern = text.substring(patch.start2, patch.start2 + patch.length1); 1699 int padding = 0; 1700 1701 // Look for the first and last matches of pattern in text. If two different 1702 // matches are found, increase the pattern length. 1703 while (text.indexOf(pattern) != text.lastIndexOf(pattern) 1704 && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { 1705 padding += Patch_Margin; 1706 pattern = text.substring(Math.max(0, patch.start2 - padding), 1707 Math.min(text.length(), patch.start2 + patch.length1 + padding)); 1708 } 1709 // Add one chunk for good luck. 1710 padding += Patch_Margin; 1711 1712 // Add the prefix. 1713 String prefix = text.substring(Math.max(0, patch.start2 - padding), 1714 patch.start2); 1715 if (prefix.length() != 0) { 1716 patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix)); 1717 } 1718 // Add the suffix. 1719 String suffix = text.substring(patch.start2 + patch.length1, 1720 Math.min(text.length(), patch.start2 + patch.length1 + padding)); 1721 if (suffix.length() != 0) { 1722 patch.diffs.addLast(new Diff(Operation.EQUAL, suffix)); 1723 } 1724 1725 // Roll back the start points. 1726 patch.start1 -= prefix.length(); 1727 patch.start2 -= prefix.length(); 1728 // Extend the lengths. 1729 patch.length1 += prefix.length() + suffix.length(); 1730 patch.length2 += prefix.length() + suffix.length(); 1731 } 1732 1733 1734 /** 1735 * Compute a list of patches to turn text1 into text2. 1736 * A set of diffs will be computed. 1737 * @param text1 Old text. 1738 * @param text2 New text. 1739 * @return LinkedList of Patch objects. 1740 */ 1741 public LinkedList<Patch> patch_make(String text1, String text2) { 1742 if (text1 == null || text2 == null) { 1743 throw new IllegalArgumentException("Null inputs. (patch_make)"); 1744 } 1745 // No diffs provided, compute our own. 1746 LinkedList<Diff> diffs = diff_main(text1, text2, true); 1747 if (diffs.size() > 2) { 1748 diff_cleanupSemantic(diffs); 1749 diff_cleanupEfficiency(diffs); 1750 } 1751 return patch_make(text1, diffs); 1752 } 1753 1754 1755 /** 1756 * Compute a list of patches to turn text1 into text2. 1757 * text1 will be derived from the provided diffs. 1758 * @param diffs Array of diff tuples for text1 to text2. 1759 * @return LinkedList of Patch objects. 1760 */ 1761 public LinkedList<Patch> patch_make(LinkedList<Diff> diffs) { 1762 if (diffs == null) { 1763 throw new IllegalArgumentException("Null inputs. (patch_make)"); 1764 } 1765 // No origin string provided, compute our own. 1766 String text1 = diff_text1(diffs); 1767 return patch_make(text1, diffs); 1768 } 1769 1770 1771 /** 1772 * Compute a list of patches to turn text1 into text2. 1773 * text2 is ignored, diffs are the delta between text1 and text2. 1774 * @param text1 Old text 1775 * @param text2 Ignored. 1776 * @param diffs Array of diff tuples for text1 to text2. 1777 * @return LinkedList of Patch objects. 1778 * @deprecated Prefer patch_make(String text1, LinkedList<Diff> diffs). 1779 */ 1780 public LinkedList<Patch> patch_make(String text1, String text2, 1781 LinkedList<Diff> diffs) { 1782 return patch_make(text1, diffs); 1783 } 1784 1785 1786 /** 1787 * Compute a list of patches to turn text1 into text2. 1788 * text2 is not provided, diffs are the delta between text1 and text2. 1789 * @param text1 Old text. 1790 * @param diffs Array of diff tuples for text1 to text2. 1791 * @return LinkedList of Patch objects. 1792 */ 1793 public LinkedList<Patch> patch_make(String text1, LinkedList<Diff> diffs) { 1794 if (text1 == null || diffs == null) { 1795 throw new IllegalArgumentException("Null inputs. (patch_make)"); 1796 } 1797 1798 LinkedList<Patch> patches = new LinkedList<Patch>(); 1799 if (diffs.isEmpty()) { 1800 return patches; // Get rid of the null case. 1801 } 1802 Patch patch = new Patch(); 1803 int char_count1 = 0; // Number of characters into the text1 string. 1804 int char_count2 = 0; // Number of characters into the text2 string. 1805 // Start with text1 (prepatch_text) and apply the diffs until we arrive at 1806 // text2 (postpatch_text). We recreate the patches one by one to determine 1807 // context info. 1808 String prepatch_text = text1; 1809 String postpatch_text = text1; 1810 for (Diff aDiff : diffs) { 1811 if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) { 1812 // A new patch starts here. 1813 patch.start1 = char_count1; 1814 patch.start2 = char_count2; 1815 } 1816 1817 switch (aDiff.operation) { 1818 case INSERT: 1819 patch.diffs.add(aDiff); 1820 patch.length2 += aDiff.text.length(); 1821 postpatch_text = postpatch_text.substring(0, char_count2) 1822 + aDiff.text + postpatch_text.substring(char_count2); 1823 break; 1824 case DELETE: 1825 patch.length1 += aDiff.text.length(); 1826 patch.diffs.add(aDiff); 1827 postpatch_text = postpatch_text.substring(0, char_count2) 1828 + postpatch_text.substring(char_count2 + aDiff.text.length()); 1829 break; 1830 case EQUAL: 1831 if (aDiff.text.length() <= 2 * Patch_Margin 1832 && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) { 1833 // Small equality inside a patch. 1834 patch.diffs.add(aDiff); 1835 patch.length1 += aDiff.text.length(); 1836 patch.length2 += aDiff.text.length(); 1837 } 1838 1839 if (aDiff.text.length() >= 2 * Patch_Margin) { 1840 // Time for a new patch. 1841 if (!patch.diffs.isEmpty()) { 1842 patch_addContext(patch, prepatch_text); 1843 patches.add(patch); 1844 patch = new Patch(); 1845 // Unlike Unidiff, our patch lists have a rolling context. 1846 // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff 1847 // Update prepatch text & pos to reflect the application of the 1848 // just completed patch. 1849 prepatch_text = postpatch_text; 1850 char_count1 = char_count2; 1851 } 1852 } 1853 break; 1854 } 1855 1856 // Update the current character count. 1857 if (aDiff.operation != Operation.INSERT) { 1858 char_count1 += aDiff.text.length(); 1859 } 1860 if (aDiff.operation != Operation.DELETE) { 1861 char_count2 += aDiff.text.length(); 1862 } 1863 } 1864 // Pick up the leftover patch if not empty. 1865 if (!patch.diffs.isEmpty()) { 1866 patch_addContext(patch, prepatch_text); 1867 patches.add(patch); 1868 } 1869 1870 return patches; 1871 } 1872 1873 1874 /** 1875 * Given an array of patches, return another array that is identical. 1876 * @param patches Array of patch objects. 1877 * @return Array of patch objects. 1878 */ 1879 public LinkedList<Patch> patch_deepCopy(LinkedList<Patch> patches) { 1880 LinkedList<Patch> patchesCopy = new LinkedList<Patch>(); 1881 for (Patch aPatch : patches) { 1882 Patch patchCopy = new Patch(); 1883 for (Diff aDiff : aPatch.diffs) { 1884 Diff diffCopy = new Diff(aDiff.operation, aDiff.text); 1885 patchCopy.diffs.add(diffCopy); 1886 } 1887 patchCopy.start1 = aPatch.start1; 1888 patchCopy.start2 = aPatch.start2; 1889 patchCopy.length1 = aPatch.length1; 1890 patchCopy.length2 = aPatch.length2; 1891 patchesCopy.add(patchCopy); 1892 } 1893 return patchesCopy; 1894 } 1895 1896 1897 /** 1898 * Merge a set of patches onto the text. Return a patched text, as well 1899 * as an array of true/false values indicating which patches were applied. 1900 * @param patches Array of patch objects 1901 * @param text Old text. 1902 * @return Two element Object array, containing the new text and an array of 1903 * boolean values. 1904 */ 1905 public Object[] patch_apply(LinkedList<Patch> patches, String text) { 1906 if (patches.isEmpty()) { 1907 return new Object[]{text, new boolean[0]}; 1908 } 1909 1910 // Deep copy the patches so that no changes are made to originals. 1911 patches = patch_deepCopy(patches); 1912 1913 String nullPadding = patch_addPadding(patches); 1914 text = nullPadding + text + nullPadding; 1915 patch_splitMax(patches); 1916 1917 int x = 0; 1918 // delta keeps track of the offset between the expected and actual location 1919 // of the previous patch. If there are patches expected at positions 10 and 1920 // 20, but the first patch was found at 12, delta is 2 and the second patch 1921 // has an effective expected position of 22. 1922 int delta = 0; 1923 boolean[] results = new boolean[patches.size()]; 1924 for (Patch aPatch : patches) { 1925 int expected_loc = aPatch.start2 + delta; 1926 String text1 = diff_text1(aPatch.diffs); 1927 int start_loc; 1928 int end_loc = -1; 1929 if (text1.length() > this.Match_MaxBits) { 1930 // patch_splitMax will only provide an oversized pattern in the case of 1931 // a monster delete. 1932 start_loc = match_main(text, 1933 text1.substring(0, this.Match_MaxBits), expected_loc); 1934 if (start_loc != -1) { 1935 end_loc = match_main(text, 1936 text1.substring(text1.length() - this.Match_MaxBits), 1937 expected_loc + text1.length() - this.Match_MaxBits); 1938 if (end_loc == -1 || start_loc >= end_loc) { 1939 // Can't find valid trailing context. Drop this patch. 1940 start_loc = -1; 1941 } 1942 } 1943 } else { 1944 start_loc = match_main(text, text1, expected_loc); 1945 } 1946 if (start_loc == -1) { 1947 // No match found. :( 1948 results[x] = false; 1949 // Subtract the delta for this failed patch from subsequent patches. 1950 delta -= aPatch.length2 - aPatch.length1; 1951 } else { 1952 // Found a match. :) 1953 results[x] = true; 1954 delta = start_loc - expected_loc; 1955 String text2; 1956 if (end_loc == -1) { 1957 text2 = text.substring(start_loc, 1958 Math.min(start_loc + text1.length(), text.length())); 1959 } else { 1960 text2 = text.substring(start_loc, 1961 Math.min(end_loc + this.Match_MaxBits, text.length())); 1962 } 1963 if (text1.equals(text2)) { 1964 // Perfect match, just shove the replacement text in. 1965 text = text.substring(0, start_loc) + diff_text2(aPatch.diffs) 1966 + text.substring(start_loc + text1.length()); 1967 } else { 1968 // Imperfect match. Run a diff to get a framework of equivalent 1969 // indices. 1970 LinkedList<Diff> diffs = diff_main(text1, text2, false); 1971 if (text1.length() > this.Match_MaxBits 1972 && diff_levenshtein(diffs) / (float) text1.length() 1973 > this.Patch_DeleteThreshold) { 1974 // The end points match, but the content is unacceptably bad. 1975 results[x] = false; 1976 } else { 1977 diff_cleanupSemanticLossless(diffs); 1978 int index1 = 0; 1979 for (Diff aDiff : aPatch.diffs) { 1980 if (aDiff.operation != Operation.EQUAL) { 1981 int index2 = diff_xIndex(diffs, index1); 1982 if (aDiff.operation == Operation.INSERT) { 1983 // Insertion 1984 text = text.substring(0, start_loc + index2) + aDiff.text 1985 + text.substring(start_loc + index2); 1986 } else if (aDiff.operation == Operation.DELETE) { 1987 // Deletion 1988 text = text.substring(0, start_loc + index2) 1989 + text.substring(start_loc + diff_xIndex(diffs, 1990 index1 + aDiff.text.length())); 1991 } 1992 } 1993 if (aDiff.operation != Operation.DELETE) { 1994 index1 += aDiff.text.length(); 1995 } 1996 } 1997 } 1998 } 1999 } 2000 x++; 2001 } 2002 // Strip the padding off. 2003 text = text.substring(nullPadding.length(), text.length() 2004 - nullPadding.length()); 2005 return new Object[]{text, results}; 2006 } 2007 2008 2009 /** 2010 * Add some padding on text start and end so that edges can match something. 2011 * Intended to be called only from within patch_apply. 2012 * @param patches Array of patch objects. 2013 * @return The padding string added to each side. 2014 */ 2015 public String patch_addPadding(LinkedList<Patch> patches) { 2016 int paddingLength = this.Patch_Margin; 2017 String nullPadding = ""; 2018 for (int x = 1; x <= paddingLength; x++) { 2019 nullPadding += String.valueOf((char) x); 2020 } 2021 2022 // Bump all the patches forward. 2023 for (Patch aPatch : patches) { 2024 aPatch.start1 += paddingLength; 2025 aPatch.start2 += paddingLength; 2026 } 2027 2028 // Add some padding on start of first diff. 2029 Patch patch = patches.getFirst(); 2030 LinkedList<Diff> diffs = patch.diffs; 2031 if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) { 2032 // Add nullPadding equality. 2033 diffs.addFirst(new Diff(Operation.EQUAL, nullPadding)); 2034 patch.start1 -= paddingLength; // Should be 0. 2035 patch.start2 -= paddingLength; // Should be 0. 2036 patch.length1 += paddingLength; 2037 patch.length2 += paddingLength; 2038 } else if (paddingLength > diffs.getFirst().text.length()) { 2039 // Grow first equality. 2040 Diff firstDiff = diffs.getFirst(); 2041 int extraLength = paddingLength - firstDiff.text.length(); 2042 firstDiff.text = nullPadding.substring(firstDiff.text.length()) 2043 + firstDiff.text; 2044 patch.start1 -= extraLength; 2045 patch.start2 -= extraLength; 2046 patch.length1 += extraLength; 2047 patch.length2 += extraLength; 2048 } 2049 2050 // Add some padding on end of last diff. 2051 patch = patches.getLast(); 2052 diffs = patch.diffs; 2053 if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) { 2054 // Add nullPadding equality. 2055 diffs.addLast(new Diff(Operation.EQUAL, nullPadding)); 2056 patch.length1 += paddingLength; 2057 patch.length2 += paddingLength; 2058 } else if (paddingLength > diffs.getLast().text.length()) { 2059 // Grow last equality. 2060 Diff lastDiff = diffs.getLast(); 2061 int extraLength = paddingLength - lastDiff.text.length(); 2062 lastDiff.text += nullPadding.substring(0, extraLength); 2063 patch.length1 += extraLength; 2064 patch.length2 += extraLength; 2065 } 2066 2067 return nullPadding; 2068 } 2069 2070 2071 /** 2072 * Look through the patches and break up any which are longer than the 2073 * maximum limit of the match algorithm. 2074 * @param patches LinkedList of Patch objects. 2075 */ 2076 public void patch_splitMax(LinkedList<Patch> patches) { 2077 int patch_size; 2078 String precontext, postcontext; 2079 Patch patch; 2080 int start1, start2; 2081 boolean empty; 2082 Operation diff_type; 2083 String diff_text; 2084 ListIterator<Patch> pointer = patches.listIterator(); 2085 Patch bigpatch = pointer.hasNext() ? pointer.next() : null; 2086 while (bigpatch != null) { 2087 if (bigpatch.length1 <= Match_MaxBits) { 2088 bigpatch = pointer.hasNext() ? pointer.next() : null; 2089 continue; 2090 } 2091 // Remove the big old patch. 2092 pointer.remove(); 2093 patch_size = Match_MaxBits; 2094 start1 = bigpatch.start1; 2095 start2 = bigpatch.start2; 2096 precontext = ""; 2097 while (!bigpatch.diffs.isEmpty()) { 2098 // Create one of several smaller patches. 2099 patch = new Patch(); 2100 empty = true; 2101 patch.start1 = start1 - precontext.length(); 2102 patch.start2 = start2 - precontext.length(); 2103 if (precontext.length() != 0) { 2104 patch.length1 = patch.length2 = precontext.length(); 2105 patch.diffs.add(new Diff(Operation.EQUAL, precontext)); 2106 } 2107 while (!bigpatch.diffs.isEmpty() 2108 && patch.length1 < patch_size - Patch_Margin) { 2109 diff_type = bigpatch.diffs.getFirst().operation; 2110 diff_text = bigpatch.diffs.getFirst().text; 2111 if (diff_type == Operation.INSERT) { 2112 // Insertions are harmless. 2113 patch.length2 += diff_text.length(); 2114 start2 += diff_text.length(); 2115 patch.diffs.addLast(bigpatch.diffs.removeFirst()); 2116 empty = false; 2117 } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1 2118 && patch.diffs.getFirst().operation == Operation.EQUAL 2119 && diff_text.length() > 2 * patch_size) { 2120 // This is a large deletion. Let it pass in one chunk. 2121 patch.length1 += diff_text.length(); 2122 start1 += diff_text.length(); 2123 empty = false; 2124 patch.diffs.add(new Diff(diff_type, diff_text)); 2125 bigpatch.diffs.removeFirst(); 2126 } else { 2127 // Deletion or equality. Only take as much as we can stomach. 2128 diff_text = diff_text.substring(0, Math.min(diff_text.length(), 2129 patch_size - patch.length1 - Patch_Margin)); 2130 patch.length1 += diff_text.length(); 2131 start1 += diff_text.length(); 2132 if (diff_type == Operation.EQUAL) { 2133 patch.length2 += diff_text.length(); 2134 start2 += diff_text.length(); 2135 } else { 2136 empty = false; 2137 } 2138 patch.diffs.add(new Diff(diff_type, diff_text)); 2139 if (diff_text.equals(bigpatch.diffs.getFirst().text)) { 2140 bigpatch.diffs.removeFirst(); 2141 } else { 2142 bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text 2143 .substring(diff_text.length()); 2144 } 2145 } 2146 } 2147 // Compute the head context for the next patch. 2148 precontext = diff_text2(patch.diffs); 2149 precontext = precontext.substring(Math.max(0, precontext.length() 2150 - Patch_Margin)); 2151 // Append the end context for this patch. 2152 if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { 2153 postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin); 2154 } else { 2155 postcontext = diff_text1(bigpatch.diffs); 2156 } 2157 if (postcontext.length() != 0) { 2158 patch.length1 += postcontext.length(); 2159 patch.length2 += postcontext.length(); 2160 if (!patch.diffs.isEmpty() 2161 && patch.diffs.getLast().operation == Operation.EQUAL) { 2162 patch.diffs.getLast().text += postcontext; 2163 } else { 2164 patch.diffs.add(new Diff(Operation.EQUAL, postcontext)); 2165 } 2166 } 2167 if (!empty) { 2168 pointer.add(patch); 2169 } 2170 } 2171 bigpatch = pointer.hasNext() ? pointer.next() : null; 2172 } 2173 } 2174 2175 2176 /** 2177 * Take a list of patches and return a textual representation. 2178 * @param patches List of Patch objects. 2179 * @return Text representation of patches. 2180 */ 2181 public String patch_toText(List<Patch> patches) { 2182 StringBuilder text = new StringBuilder(); 2183 for (Patch aPatch : patches) { 2184 text.append(aPatch); 2185 } 2186 return text.toString(); 2187 } 2188 2189 2190 /** 2191 * Parse a textual representation of patches and return a List of Patch 2192 * objects. 2193 * @param textline Text representation of patches. 2194 * @return List of Patch objects. 2195 * @throws IllegalArgumentException If invalid input. 2196 */ 2197 public List<Patch> patch_fromText(String textline) 2198 throws IllegalArgumentException { 2199 List<Patch> patches = new LinkedList<Patch>(); 2200 if (textline.length() == 0) { 2201 return patches; 2202 } 2203 List<String> textList = Arrays.asList(textline.split("\n")); 2204 LinkedList<String> text = new LinkedList<String>(textList); 2205 Patch patch; 2206 Pattern patchHeader 2207 = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); 2208 Matcher m; 2209 char sign; 2210 String line; 2211 while (!text.isEmpty()) { 2212 m = patchHeader.matcher(text.getFirst()); 2213 if (!m.matches()) { 2214 throw new IllegalArgumentException( 2215 "Invalid patch string: " + text.getFirst()); 2216 } 2217 patch = new Patch(); 2218 patches.add(patch); 2219 patch.start1 = Integer.parseInt(m.group(1)); 2220 if (m.group(2).length() == 0) { 2221 patch.start1--; 2222 patch.length1 = 1; 2223 } else if (m.group(2).equals("0")) { 2224 patch.length1 = 0; 2225 } else { 2226 patch.start1--; 2227 patch.length1 = Integer.parseInt(m.group(2)); 2228 } 2229 2230 patch.start2 = Integer.parseInt(m.group(3)); 2231 if (m.group(4).length() == 0) { 2232 patch.start2--; 2233 patch.length2 = 1; 2234 } else if (m.group(4).equals("0")) { 2235 patch.length2 = 0; 2236 } else { 2237 patch.start2--; 2238 patch.length2 = Integer.parseInt(m.group(4)); 2239 } 2240 text.removeFirst(); 2241 2242 while (!text.isEmpty()) { 2243 try { 2244 sign = text.getFirst().charAt(0); 2245 } catch (IndexOutOfBoundsException e) { 2246 // Blank line? Whatever. 2247 text.removeFirst(); 2248 continue; 2249 } 2250 line = text.getFirst().substring(1); 2251 line = line.replace("+", "%2B"); // decode would change all "+" to " " 2252 try { 2253 line = URLDecoder.decode(line, "UTF-8"); 2254 } catch (UnsupportedEncodingException e) { 2255 // Not likely on modern system. 2256 throw new Error("This system does not support UTF-8.", e); 2257 } catch (IllegalArgumentException e) { 2258 // Malformed URI sequence. 2259 throw new IllegalArgumentException( 2260 "Illegal escape in patch_fromText: " + line, e); 2261 } 2262 if (sign == '-') { 2263 // Deletion. 2264 patch.diffs.add(new Diff(Operation.DELETE, line)); 2265 } else if (sign == '+') { 2266 // Insertion. 2267 patch.diffs.add(new Diff(Operation.INSERT, line)); 2268 } else if (sign == ' ') { 2269 // Minor equality. 2270 patch.diffs.add(new Diff(Operation.EQUAL, line)); 2271 } else if (sign == '@') { 2272 // Start of next patch. 2273 break; 2274 } else { 2275 // WTF? 2276 throw new IllegalArgumentException( 2277 "Invalid patch mode '" + sign + "' in: " + line); 2278 } 2279 text.removeFirst(); 2280 } 2281 } 2282 return patches; 2283 } 2284 2285 2286 /** 2287 * Class representing one diff operation. 2288 */ 2289 public static class Diff { 2290 /** 2291 * One of: INSERT, DELETE or EQUAL. 2292 */ 2293 public Operation operation; 2294 /** 2295 * The text associated with this diff operation. 2296 */ 2297 public String text; 2298 2299 /** 2300 * Constructor. Initializes the diff with the provided values. 2301 * @param operation One of INSERT, DELETE or EQUAL. 2302 * @param text The text being applied. 2303 */ 2304 public Diff(Operation operation, String text) { 2305 // Construct a diff with the specified operation and text. 2306 this.operation = operation; 2307 this.text = text; 2308 } 2309 2310 2311 /** 2312 * Display a human-readable version of this Diff. 2313 * @return text version. 2314 */ 2315 public String toString() { 2316 String prettyText = this.text.replace('\n', '\u00b6'); 2317 return "Diff(" + this.operation + ",\"" + prettyText + "\")"; 2318 } 2319 2320 2321 /** 2322 * Is this Diff equivalent to another Diff? 2323 * @param d Another Diff to compare against. 2324 * @return true or false. 2325 */ 2326 public boolean equals(Object d) { 2327 try { 2328 return (((Diff) d).operation == this.operation) 2329 && (((Diff) d).text.equals(this.text)); 2330 } catch (ClassCastException e) { 2331 return false; 2332 } 2333 } 2334 } 2335 2336 2337 /** 2338 * Class representing one patch operation. 2339 */ 2340 public static class Patch { 2341 public LinkedList<Diff> diffs; 2342 public int start1; 2343 public int start2; 2344 public int length1; 2345 public int length2; 2346 2347 2348 /** 2349 * Constructor. Initializes with an empty list of diffs. 2350 */ 2351 public Patch() { 2352 this.diffs = new LinkedList<Diff>(); 2353 } 2354 2355 2356 /** 2357 * Emmulate GNU diff's format. 2358 * Header: @@ -382,8 +481,9 @@ 2359 * Indicies are printed as 1-based, not 0-based. 2360 * @return The GNU diff string. 2361 */ 2362 public String toString() { 2363 String coords1, coords2; 2364 if (this.length1 == 0) { 2365 coords1 = this.start1 + ",0"; 2366 } else if (this.length1 == 1) { 2367 coords1 = Integer.toString(this.start1 + 1); 2368 } else { 2369 coords1 = (this.start1 + 1) + "," + this.length1; 2370 } 2371 if (this.length2 == 0) { 2372 coords2 = this.start2 + ",0"; 2373 } else if (this.length2 == 1) { 2374 coords2 = Integer.toString(this.start2 + 1); 2375 } else { 2376 coords2 = (this.start2 + 1) + "," + this.length2; 2377 } 2378 StringBuilder text = new StringBuilder(); 2379 text.append("@@ -").append(coords1).append(" +").append(coords2) 2380 .append(" @@\n"); 2381 // Escape the body of the patch with %xx notation. 2382 for (Diff aDiff : this.diffs) { 2383 switch (aDiff.operation) { 2384 case INSERT: 2385 text.append('+'); 2386 break; 2387 case DELETE: 2388 text.append('-'); 2389 break; 2390 case EQUAL: 2391 text.append(' '); 2392 break; 2393 } 2394 try { 2395 text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' ')) 2396 .append("\n"); 2397 } catch (UnsupportedEncodingException e) { 2398 // Not likely on modern system. 2399 throw new Error("This system does not support UTF-8.", e); 2400 } 2401 } 2402 return unescapeForEncodeUriCompatability(text.toString()); 2403 } 2404 } 2405 2406 2407 /** 2408 * Unescape selected chars for compatability with JavaScript's encodeURI. 2409 * In speed critical applications this could be dropped since the 2410 * receiving application will certainly decode these fine. 2411 * Note that this function is case-sensitive. Thus "%3f" would not be 2412 * unescaped. But this is ok because it is only called with the output of 2413 * URLEncoder.encode which returns uppercase hex. 2414 * 2415 * Example: "%3F" -> "?", "%24" -> "$", etc. 2416 * 2417 * @param str The string to escape. 2418 * @return The escaped string. 2419 */ 2420 private static String unescapeForEncodeUriCompatability(String str) { 2421 return str.replace("%21", "!").replace("%7E", "~") 2422 .replace("%27", "'").replace("%28", "(").replace("%29", ")") 2423 .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?") 2424 .replace("%3A", ":").replace("%40", "@").replace("%26", "&") 2425 .replace("%3D", "=").replace("%2B", "+").replace("%24", "$") 2426 .replace("%2C", ",").replace("%23", "#"); 2427 } 2428 } 2429