1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.io; 18 19 import java.io.File; 20 import java.util.ArrayList; 21 import java.util.Collection; 22 import java.util.Iterator; 23 import java.util.Stack; 24 25 /** 26 * General filename and filepath manipulation utilities. 27 * <p> 28 * When dealing with filenames you can hit problems when moving from a Windows 29 * based development machine to a Unix based production machine. 30 * This class aims to help avoid those problems. 31 * <p> 32 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 33 * using JDK {@link java.io.File File} objects and the two argument constructor 34 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 35 * <p> 36 * Most methods on this class are designed to work the same on both Unix and Windows. 37 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 38 * <p> 39 * Most methods recognise both separators (forward and back), and both 40 * sets of prefixes. See the javadoc of each method for details. 41 * <p> 42 * This class defines six components within a filename 43 * (example C:\dev\project\file.txt): 44 * <ul> 45 * <li>the prefix - C:\</li> 46 * <li>the path - dev\project\</li> 47 * <li>the full path - C:\dev\project\</li> 48 * <li>the name - file.txt</li> 49 * <li>the base name - file</li> 50 * <li>the extension - txt</li> 51 * </ul> 52 * Note that this class works best if directory filenames end with a separator. 53 * If you omit the last separator, it is impossible to determine if the filename 54 * corresponds to a file or a directory. As a result, we have chosen to say 55 * it corresponds to a file. 56 * <p> 57 * This class only supports Unix and Windows style names. 58 * Prefixes are matched as follows: 59 * <pre> 60 * Windows: 61 * a\b\c.txt --> "" --> relative 62 * \a\b\c.txt --> "\" --> current drive absolute 63 * C:a\b\c.txt --> "C:" --> drive relative 64 * C:\a\b\c.txt --> "C:\" --> absolute 65 * \\server\a\b\c.txt --> "\\server\" --> UNC 66 * 67 * Unix: 68 * a/b/c.txt --> "" --> relative 69 * /a/b/c.txt --> "/" --> absolute 70 * ~/a/b/c.txt --> "~/" --> current user 71 * ~ --> "~/" --> current user (slash added) 72 * ~user/a/b/c.txt --> "~user/" --> named user 73 * ~user --> "~user/" --> named user (slash added) 74 * </pre> 75 * Both prefix styles are matched always, irrespective of the machine that you are 76 * currently running on. 77 * <p> 78 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 79 * 80 * @author <a href="mailto:burton (at) relativity.yi.org">Kevin A. Burton</A> 81 * @author <a href="mailto:sanders (at) apache.org">Scott Sanders</a> 82 * @author <a href="mailto:dlr (at) finemaltcoding.com">Daniel Rall</a> 83 * @author <a href="mailto:Christoph.Reck (at) dlr.de">Christoph.Reck</a> 84 * @author <a href="mailto:peter (at) apache.org">Peter Donald</a> 85 * @author <a href="mailto:jefft (at) apache.org">Jeff Turner</a> 86 * @author Matthew Hawthorne 87 * @author Martin Cooper 88 * @author <a href="mailto:jeremias (at) apache.org">Jeremias Maerki</a> 89 * @author Stephen Colebourne 90 * @version $Id: FilenameUtils.java 609870 2008-01-08 04:46:26Z niallp $ 91 * @since Commons IO 1.1 92 */ 93 public class FilenameUtils { 94 95 /** 96 * The extension separator character. 97 * @since Commons IO 1.4 98 */ 99 public static final char EXTENSION_SEPARATOR = '.'; 100 101 /** 102 * The extension separator String. 103 * @since Commons IO 1.4 104 */ 105 public static final String EXTENSION_SEPARATOR_STR = (new Character(EXTENSION_SEPARATOR)).toString(); 106 107 /** 108 * The Unix separator character. 109 */ 110 private static final char UNIX_SEPARATOR = '/'; 111 112 /** 113 * The Windows separator character. 114 */ 115 private static final char WINDOWS_SEPARATOR = '\\'; 116 117 /** 118 * The system separator character. 119 */ 120 private static final char SYSTEM_SEPARATOR = File.separatorChar; 121 122 /** 123 * The separator character that is the opposite of the system separator. 124 */ 125 private static final char OTHER_SEPARATOR; 126 static { 127 if (isSystemWindows()) { 128 OTHER_SEPARATOR = UNIX_SEPARATOR; 129 } else { 130 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 131 } 132 } 133 134 /** 135 * Instances should NOT be constructed in standard programming. 136 */ 137 public FilenameUtils() { 138 super(); 139 } 140 141 //----------------------------------------------------------------------- 142 /** 143 * Determines if Windows file system is in use. 144 * 145 * @return true if the system is Windows 146 */ 147 static boolean isSystemWindows() { 148 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 149 } 150 151 //----------------------------------------------------------------------- 152 /** 153 * Checks if the character is a separator. 154 * 155 * @param ch the character to check 156 * @return true if it is a separator character 157 */ 158 private static boolean isSeparator(char ch) { 159 return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR); 160 } 161 162 //----------------------------------------------------------------------- 163 /** 164 * Normalizes a path, removing double and single dot path steps. 165 * <p> 166 * This method normalizes a path to a standard format. 167 * The input may contain separators in either Unix or Windows format. 168 * The output will contain separators in the format of the system. 169 * <p> 170 * A trailing slash will be retained. 171 * A double slash will be merged to a single slash (but UNC names are handled). 172 * A single dot path segment will be removed. 173 * A double dot will cause that path segment and the one before to be removed. 174 * If the double dot has no parent path segment to work with, <code>null</code> 175 * is returned. 176 * <p> 177 * The output will be the same on both Unix and Windows except 178 * for the separator character. 179 * <pre> 180 * /foo// --> /foo/ 181 * /foo/./ --> /foo/ 182 * /foo/../bar --> /bar 183 * /foo/../bar/ --> /bar/ 184 * /foo/../bar/../baz --> /baz 185 * //foo//./bar --> /foo/bar 186 * /../ --> null 187 * ../foo --> null 188 * foo/bar/.. --> foo/ 189 * foo/../../bar --> null 190 * foo/../bar --> bar 191 * //server/foo/../bar --> //server/bar 192 * //server/../bar --> null 193 * C:\foo\..\bar --> C:\bar 194 * C:\..\bar --> null 195 * ~/foo/../bar/ --> ~/bar/ 196 * ~/../bar --> null 197 * </pre> 198 * (Note the file separator returned will be correct for Windows/Unix) 199 * 200 * @param filename the filename to normalize, null returns null 201 * @return the normalized filename, or null if invalid 202 */ 203 public static String normalize(String filename) { 204 return doNormalize(filename, true); 205 } 206 207 //----------------------------------------------------------------------- 208 /** 209 * Normalizes a path, removing double and single dot path steps, 210 * and removing any final directory separator. 211 * <p> 212 * This method normalizes a path to a standard format. 213 * The input may contain separators in either Unix or Windows format. 214 * The output will contain separators in the format of the system. 215 * <p> 216 * A trailing slash will be removed. 217 * A double slash will be merged to a single slash (but UNC names are handled). 218 * A single dot path segment will be removed. 219 * A double dot will cause that path segment and the one before to be removed. 220 * If the double dot has no parent path segment to work with, <code>null</code> 221 * is returned. 222 * <p> 223 * The output will be the same on both Unix and Windows except 224 * for the separator character. 225 * <pre> 226 * /foo// --> /foo 227 * /foo/./ --> /foo 228 * /foo/../bar --> /bar 229 * /foo/../bar/ --> /bar 230 * /foo/../bar/../baz --> /baz 231 * //foo//./bar --> /foo/bar 232 * /../ --> null 233 * ../foo --> null 234 * foo/bar/.. --> foo 235 * foo/../../bar --> null 236 * foo/../bar --> bar 237 * //server/foo/../bar --> //server/bar 238 * //server/../bar --> null 239 * C:\foo\..\bar --> C:\bar 240 * C:\..\bar --> null 241 * ~/foo/../bar/ --> ~/bar 242 * ~/../bar --> null 243 * </pre> 244 * (Note the file separator returned will be correct for Windows/Unix) 245 * 246 * @param filename the filename to normalize, null returns null 247 * @return the normalized filename, or null if invalid 248 */ 249 public static String normalizeNoEndSeparator(String filename) { 250 return doNormalize(filename, false); 251 } 252 253 /** 254 * Internal method to perform the normalization. 255 * 256 * @param filename the filename 257 * @param keepSeparator true to keep the final separator 258 * @return the normalized filename 259 */ 260 private static String doNormalize(String filename, boolean keepSeparator) { 261 if (filename == null) { 262 return null; 263 } 264 int size = filename.length(); 265 if (size == 0) { 266 return filename; 267 } 268 int prefix = getPrefixLength(filename); 269 if (prefix < 0) { 270 return null; 271 } 272 273 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 274 filename.getChars(0, filename.length(), array, 0); 275 276 // fix separators throughout 277 for (int i = 0; i < array.length; i++) { 278 if (array[i] == OTHER_SEPARATOR) { 279 array[i] = SYSTEM_SEPARATOR; 280 } 281 } 282 283 // add extra separator on the end to simplify code below 284 boolean lastIsDirectory = true; 285 if (array[size - 1] != SYSTEM_SEPARATOR) { 286 array[size++] = SYSTEM_SEPARATOR; 287 lastIsDirectory = false; 288 } 289 290 // adjoining slashes 291 for (int i = prefix + 1; i < size; i++) { 292 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) { 293 System.arraycopy(array, i, array, i - 1, size - i); 294 size--; 295 i--; 296 } 297 } 298 299 // dot slash 300 for (int i = prefix + 1; i < size; i++) { 301 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && 302 (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) { 303 if (i == size - 1) { 304 lastIsDirectory = true; 305 } 306 System.arraycopy(array, i + 1, array, i - 1, size - i); 307 size -=2; 308 i--; 309 } 310 } 311 312 // double dot slash 313 outer: 314 for (int i = prefix + 2; i < size; i++) { 315 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' && 316 (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) { 317 if (i == prefix + 2) { 318 return null; 319 } 320 if (i == size - 1) { 321 lastIsDirectory = true; 322 } 323 int j; 324 for (j = i - 4 ; j >= prefix; j--) { 325 if (array[j] == SYSTEM_SEPARATOR) { 326 // remove b/../ from a/b/../c 327 System.arraycopy(array, i + 1, array, j + 1, size - i); 328 size -= (i - j); 329 i = j + 1; 330 continue outer; 331 } 332 } 333 // remove a/../ from a/../c 334 System.arraycopy(array, i + 1, array, prefix, size - i); 335 size -= (i + 1 - prefix); 336 i = prefix + 1; 337 } 338 } 339 340 if (size <= 0) { // should never be less than 0 341 return ""; 342 } 343 if (size <= prefix) { // should never be less than prefix 344 return new String(array, 0, size); 345 } 346 if (lastIsDirectory && keepSeparator) { 347 return new String(array, 0, size); // keep trailing separator 348 } 349 return new String(array, 0, size - 1); // lose trailing separator 350 } 351 352 //----------------------------------------------------------------------- 353 /** 354 * Concatenates a filename to a base path using normal command line style rules. 355 * <p> 356 * The effect is equivalent to resultant directory after changing 357 * directory to the first argument, followed by changing directory to 358 * the second argument. 359 * <p> 360 * The first argument is the base path, the second is the path to concatenate. 361 * The returned path is always normalized via {@link #normalize(String)}, 362 * thus <code>..</code> is handled. 363 * <p> 364 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 365 * it will be normalized and returned. 366 * Otherwise, the paths will be joined, normalized and returned. 367 * <p> 368 * The output will be the same on both Unix and Windows except 369 * for the separator character. 370 * <pre> 371 * /foo/ + bar --> /foo/bar 372 * /foo + bar --> /foo/bar 373 * /foo + /bar --> /bar 374 * /foo + C:/bar --> C:/bar 375 * /foo + C:bar --> C:bar (*) 376 * /foo/a/ + ../bar --> foo/bar 377 * /foo/ + ../../bar --> null 378 * /foo/ + /bar --> /bar 379 * /foo/.. + /bar --> /bar 380 * /foo + bar/c.txt --> /foo/bar/c.txt 381 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 382 * </pre> 383 * (*) Note that the Windows relative drive prefix is unreliable when 384 * used with this method. 385 * (!) Note that the first parameter must be a path. If it ends with a name, then 386 * the name will be built into the concatenated path. If this might be a problem, 387 * use {@link #getFullPath(String)} on the base path argument. 388 * 389 * @param basePath the base path to attach to, always treated as a path 390 * @param fullFilenameToAdd the filename (or path) to attach to the base 391 * @return the concatenated path, or null if invalid 392 */ 393 public static String concat(String basePath, String fullFilenameToAdd) { 394 int prefix = getPrefixLength(fullFilenameToAdd); 395 if (prefix < 0) { 396 return null; 397 } 398 if (prefix > 0) { 399 return normalize(fullFilenameToAdd); 400 } 401 if (basePath == null) { 402 return null; 403 } 404 int len = basePath.length(); 405 if (len == 0) { 406 return normalize(fullFilenameToAdd); 407 } 408 char ch = basePath.charAt(len - 1); 409 if (isSeparator(ch)) { 410 return normalize(basePath + fullFilenameToAdd); 411 } else { 412 return normalize(basePath + '/' + fullFilenameToAdd); 413 } 414 } 415 416 //----------------------------------------------------------------------- 417 /** 418 * Converts all separators to the Unix separator of forward slash. 419 * 420 * @param path the path to be changed, null ignored 421 * @return the updated path 422 */ 423 public static String separatorsToUnix(String path) { 424 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) { 425 return path; 426 } 427 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 428 } 429 430 /** 431 * Converts all separators to the Windows separator of backslash. 432 * 433 * @param path the path to be changed, null ignored 434 * @return the updated path 435 */ 436 public static String separatorsToWindows(String path) { 437 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) { 438 return path; 439 } 440 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 441 } 442 443 /** 444 * Converts all separators to the system separator. 445 * 446 * @param path the path to be changed, null ignored 447 * @return the updated path 448 */ 449 public static String separatorsToSystem(String path) { 450 if (path == null) { 451 return null; 452 } 453 if (isSystemWindows()) { 454 return separatorsToWindows(path); 455 } else { 456 return separatorsToUnix(path); 457 } 458 } 459 460 //----------------------------------------------------------------------- 461 /** 462 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. 463 * <p> 464 * This method will handle a file in either Unix or Windows format. 465 * <p> 466 * The prefix length includes the first slash in the full filename 467 * if applicable. Thus, it is possible that the length returned is greater 468 * than the length of the input string. 469 * <pre> 470 * Windows: 471 * a\b\c.txt --> "" --> relative 472 * \a\b\c.txt --> "\" --> current drive absolute 473 * C:a\b\c.txt --> "C:" --> drive relative 474 * C:\a\b\c.txt --> "C:\" --> absolute 475 * \\server\a\b\c.txt --> "\\server\" --> UNC 476 * 477 * Unix: 478 * a/b/c.txt --> "" --> relative 479 * /a/b/c.txt --> "/" --> absolute 480 * ~/a/b/c.txt --> "~/" --> current user 481 * ~ --> "~/" --> current user (slash added) 482 * ~user/a/b/c.txt --> "~user/" --> named user 483 * ~user --> "~user/" --> named user (slash added) 484 * </pre> 485 * <p> 486 * The output will be the same irrespective of the machine that the code is running on. 487 * ie. both Unix and Windows prefixes are matched regardless. 488 * 489 * @param filename the filename to find the prefix in, null returns -1 490 * @return the length of the prefix, -1 if invalid or null 491 */ 492 public static int getPrefixLength(String filename) { 493 if (filename == null) { 494 return -1; 495 } 496 int len = filename.length(); 497 if (len == 0) { 498 return 0; 499 } 500 char ch0 = filename.charAt(0); 501 if (ch0 == ':') { 502 return -1; 503 } 504 if (len == 1) { 505 if (ch0 == '~') { 506 return 2; // return a length greater than the input 507 } 508 return (isSeparator(ch0) ? 1 : 0); 509 } else { 510 if (ch0 == '~') { 511 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 512 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 513 if (posUnix == -1 && posWin == -1) { 514 return len + 1; // return a length greater than the input 515 } 516 posUnix = (posUnix == -1 ? posWin : posUnix); 517 posWin = (posWin == -1 ? posUnix : posWin); 518 return Math.min(posUnix, posWin) + 1; 519 } 520 char ch1 = filename.charAt(1); 521 if (ch1 == ':') { 522 ch0 = Character.toUpperCase(ch0); 523 if (ch0 >= 'A' && ch0 <= 'Z') { 524 if (len == 2 || isSeparator(filename.charAt(2)) == false) { 525 return 2; 526 } 527 return 3; 528 } 529 return -1; 530 531 } else if (isSeparator(ch0) && isSeparator(ch1)) { 532 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 533 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 534 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) { 535 return -1; 536 } 537 posUnix = (posUnix == -1 ? posWin : posUnix); 538 posWin = (posWin == -1 ? posUnix : posWin); 539 return Math.min(posUnix, posWin) + 1; 540 } else { 541 return (isSeparator(ch0) ? 1 : 0); 542 } 543 } 544 } 545 546 /** 547 * Returns the index of the last directory separator character. 548 * <p> 549 * This method will handle a file in either Unix or Windows format. 550 * The position of the last forward or backslash is returned. 551 * <p> 552 * The output will be the same irrespective of the machine that the code is running on. 553 * 554 * @param filename the filename to find the last path separator in, null returns -1 555 * @return the index of the last separator character, or -1 if there 556 * is no such character 557 */ 558 public static int indexOfLastSeparator(String filename) { 559 if (filename == null) { 560 return -1; 561 } 562 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 563 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 564 return Math.max(lastUnixPos, lastWindowsPos); 565 } 566 567 /** 568 * Returns the index of the last extension separator character, which is a dot. 569 * <p> 570 * This method also checks that there is no directory separator after the last dot. 571 * To do this it uses {@link #indexOfLastSeparator(String)} which will 572 * handle a file in either Unix or Windows format. 573 * <p> 574 * The output will be the same irrespective of the machine that the code is running on. 575 * 576 * @param filename the filename to find the last path separator in, null returns -1 577 * @return the index of the last separator character, or -1 if there 578 * is no such character 579 */ 580 public static int indexOfExtension(String filename) { 581 if (filename == null) { 582 return -1; 583 } 584 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 585 int lastSeparator = indexOfLastSeparator(filename); 586 return (lastSeparator > extensionPos ? -1 : extensionPos); 587 } 588 589 //----------------------------------------------------------------------- 590 /** 591 * Gets the prefix from a full filename, such as <code>C:/</code> 592 * or <code>~/</code>. 593 * <p> 594 * This method will handle a file in either Unix or Windows format. 595 * The prefix includes the first slash in the full filename where applicable. 596 * <pre> 597 * Windows: 598 * a\b\c.txt --> "" --> relative 599 * \a\b\c.txt --> "\" --> current drive absolute 600 * C:a\b\c.txt --> "C:" --> drive relative 601 * C:\a\b\c.txt --> "C:\" --> absolute 602 * \\server\a\b\c.txt --> "\\server\" --> UNC 603 * 604 * Unix: 605 * a/b/c.txt --> "" --> relative 606 * /a/b/c.txt --> "/" --> absolute 607 * ~/a/b/c.txt --> "~/" --> current user 608 * ~ --> "~/" --> current user (slash added) 609 * ~user/a/b/c.txt --> "~user/" --> named user 610 * ~user --> "~user/" --> named user (slash added) 611 * </pre> 612 * <p> 613 * The output will be the same irrespective of the machine that the code is running on. 614 * ie. both Unix and Windows prefixes are matched regardless. 615 * 616 * @param filename the filename to query, null returns null 617 * @return the prefix of the file, null if invalid 618 */ 619 public static String getPrefix(String filename) { 620 if (filename == null) { 621 return null; 622 } 623 int len = getPrefixLength(filename); 624 if (len < 0) { 625 return null; 626 } 627 if (len > filename.length()) { 628 return filename + UNIX_SEPARATOR; // we know this only happens for unix 629 } 630 return filename.substring(0, len); 631 } 632 633 /** 634 * Gets the path from a full filename, which excludes the prefix. 635 * <p> 636 * This method will handle a file in either Unix or Windows format. 637 * The method is entirely text based, and returns the text before and 638 * including the last forward or backslash. 639 * <pre> 640 * C:\a\b\c.txt --> a\b\ 641 * ~/a/b/c.txt --> a/b/ 642 * a.txt --> "" 643 * a/b/c --> a/b/ 644 * a/b/c/ --> a/b/c/ 645 * </pre> 646 * <p> 647 * The output will be the same irrespective of the machine that the code is running on. 648 * <p> 649 * This method drops the prefix from the result. 650 * See {@link #getFullPath(String)} for the method that retains the prefix. 651 * 652 * @param filename the filename to query, null returns null 653 * @return the path of the file, an empty string if none exists, null if invalid 654 */ 655 public static String getPath(String filename) { 656 return doGetPath(filename, 1); 657 } 658 659 /** 660 * Gets the path from a full filename, which excludes the prefix, and 661 * also excluding the final directory separator. 662 * <p> 663 * This method will handle a file in either Unix or Windows format. 664 * The method is entirely text based, and returns the text before the 665 * last forward or backslash. 666 * <pre> 667 * C:\a\b\c.txt --> a\b 668 * ~/a/b/c.txt --> a/b 669 * a.txt --> "" 670 * a/b/c --> a/b 671 * a/b/c/ --> a/b/c 672 * </pre> 673 * <p> 674 * The output will be the same irrespective of the machine that the code is running on. 675 * <p> 676 * This method drops the prefix from the result. 677 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 678 * 679 * @param filename the filename to query, null returns null 680 * @return the path of the file, an empty string if none exists, null if invalid 681 */ 682 public static String getPathNoEndSeparator(String filename) { 683 return doGetPath(filename, 0); 684 } 685 686 /** 687 * Does the work of getting the path. 688 * 689 * @param filename the filename 690 * @param separatorAdd 0 to omit the end separator, 1 to return it 691 * @return the path 692 */ 693 private static String doGetPath(String filename, int separatorAdd) { 694 if (filename == null) { 695 return null; 696 } 697 int prefix = getPrefixLength(filename); 698 if (prefix < 0) { 699 return null; 700 } 701 int index = indexOfLastSeparator(filename); 702 if (prefix >= filename.length() || index < 0) { 703 return ""; 704 } 705 return filename.substring(prefix, index + separatorAdd); 706 } 707 708 /** 709 * Gets the full path from a full filename, which is the prefix + path. 710 * <p> 711 * This method will handle a file in either Unix or Windows format. 712 * The method is entirely text based, and returns the text before and 713 * including the last forward or backslash. 714 * <pre> 715 * C:\a\b\c.txt --> C:\a\b\ 716 * ~/a/b/c.txt --> ~/a/b/ 717 * a.txt --> "" 718 * a/b/c --> a/b/ 719 * a/b/c/ --> a/b/c/ 720 * C: --> C: 721 * C:\ --> C:\ 722 * ~ --> ~/ 723 * ~/ --> ~/ 724 * ~user --> ~user/ 725 * ~user/ --> ~user/ 726 * </pre> 727 * <p> 728 * The output will be the same irrespective of the machine that the code is running on. 729 * 730 * @param filename the filename to query, null returns null 731 * @return the path of the file, an empty string if none exists, null if invalid 732 */ 733 public static String getFullPath(String filename) { 734 return doGetFullPath(filename, true); 735 } 736 737 /** 738 * Gets the full path from a full filename, which is the prefix + path, 739 * and also excluding the final directory separator. 740 * <p> 741 * This method will handle a file in either Unix or Windows format. 742 * The method is entirely text based, and returns the text before the 743 * last forward or backslash. 744 * <pre> 745 * C:\a\b\c.txt --> C:\a\b 746 * ~/a/b/c.txt --> ~/a/b 747 * a.txt --> "" 748 * a/b/c --> a/b 749 * a/b/c/ --> a/b/c 750 * C: --> C: 751 * C:\ --> C:\ 752 * ~ --> ~ 753 * ~/ --> ~ 754 * ~user --> ~user 755 * ~user/ --> ~user 756 * </pre> 757 * <p> 758 * The output will be the same irrespective of the machine that the code is running on. 759 * 760 * @param filename the filename to query, null returns null 761 * @return the path of the file, an empty string if none exists, null if invalid 762 */ 763 public static String getFullPathNoEndSeparator(String filename) { 764 return doGetFullPath(filename, false); 765 } 766 767 /** 768 * Does the work of getting the path. 769 * 770 * @param filename the filename 771 * @param includeSeparator true to include the end separator 772 * @return the path 773 */ 774 private static String doGetFullPath(String filename, boolean includeSeparator) { 775 if (filename == null) { 776 return null; 777 } 778 int prefix = getPrefixLength(filename); 779 if (prefix < 0) { 780 return null; 781 } 782 if (prefix >= filename.length()) { 783 if (includeSeparator) { 784 return getPrefix(filename); // add end slash if necessary 785 } else { 786 return filename; 787 } 788 } 789 int index = indexOfLastSeparator(filename); 790 if (index < 0) { 791 return filename.substring(0, prefix); 792 } 793 int end = index + (includeSeparator ? 1 : 0); 794 return filename.substring(0, end); 795 } 796 797 /** 798 * Gets the name minus the path from a full filename. 799 * <p> 800 * This method will handle a file in either Unix or Windows format. 801 * The text after the last forward or backslash is returned. 802 * <pre> 803 * a/b/c.txt --> c.txt 804 * a.txt --> a.txt 805 * a/b/c --> c 806 * a/b/c/ --> "" 807 * </pre> 808 * <p> 809 * The output will be the same irrespective of the machine that the code is running on. 810 * 811 * @param filename the filename to query, null returns null 812 * @return the name of the file without the path, or an empty string if none exists 813 */ 814 public static String getName(String filename) { 815 if (filename == null) { 816 return null; 817 } 818 int index = indexOfLastSeparator(filename); 819 return filename.substring(index + 1); 820 } 821 822 /** 823 * Gets the base name, minus the full path and extension, from a full filename. 824 * <p> 825 * This method will handle a file in either Unix or Windows format. 826 * The text after the last forward or backslash and before the last dot is returned. 827 * <pre> 828 * a/b/c.txt --> c 829 * a.txt --> a 830 * a/b/c --> c 831 * a/b/c/ --> "" 832 * </pre> 833 * <p> 834 * The output will be the same irrespective of the machine that the code is running on. 835 * 836 * @param filename the filename to query, null returns null 837 * @return the name of the file without the path, or an empty string if none exists 838 */ 839 public static String getBaseName(String filename) { 840 return removeExtension(getName(filename)); 841 } 842 843 /** 844 * Gets the extension of a filename. 845 * <p> 846 * This method returns the textual part of the filename after the last dot. 847 * There must be no directory separator after the dot. 848 * <pre> 849 * foo.txt --> "txt" 850 * a/b/c.jpg --> "jpg" 851 * a/b.txt/c --> "" 852 * a/b/c --> "" 853 * </pre> 854 * <p> 855 * The output will be the same irrespective of the machine that the code is running on. 856 * 857 * @param filename the filename to retrieve the extension of. 858 * @return the extension of the file or an empty string if none exists. 859 */ 860 public static String getExtension(String filename) { 861 if (filename == null) { 862 return null; 863 } 864 int index = indexOfExtension(filename); 865 if (index == -1) { 866 return ""; 867 } else { 868 return filename.substring(index + 1); 869 } 870 } 871 872 //----------------------------------------------------------------------- 873 /** 874 * Removes the extension from a filename. 875 * <p> 876 * This method returns the textual part of the filename before the last dot. 877 * There must be no directory separator after the dot. 878 * <pre> 879 * foo.txt --> foo 880 * a\b\c.jpg --> a\b\c 881 * a\b\c --> a\b\c 882 * a.b\c --> a.b\c 883 * </pre> 884 * <p> 885 * The output will be the same irrespective of the machine that the code is running on. 886 * 887 * @param filename the filename to query, null returns null 888 * @return the filename minus the extension 889 */ 890 public static String removeExtension(String filename) { 891 if (filename == null) { 892 return null; 893 } 894 int index = indexOfExtension(filename); 895 if (index == -1) { 896 return filename; 897 } else { 898 return filename.substring(0, index); 899 } 900 } 901 902 //----------------------------------------------------------------------- 903 /** 904 * Checks whether two filenames are equal exactly. 905 * <p> 906 * No processing is performed on the filenames other than comparison, 907 * thus this is merely a null-safe case-sensitive equals. 908 * 909 * @param filename1 the first filename to query, may be null 910 * @param filename2 the second filename to query, may be null 911 * @return true if the filenames are equal, null equals null 912 * @see IOCase#SENSITIVE 913 */ 914 public static boolean equals(String filename1, String filename2) { 915 return equals(filename1, filename2, false, IOCase.SENSITIVE); 916 } 917 918 /** 919 * Checks whether two filenames are equal using the case rules of the system. 920 * <p> 921 * No processing is performed on the filenames other than comparison. 922 * The check is case-sensitive on Unix and case-insensitive on Windows. 923 * 924 * @param filename1 the first filename to query, may be null 925 * @param filename2 the second filename to query, may be null 926 * @return true if the filenames are equal, null equals null 927 * @see IOCase#SYSTEM 928 */ 929 public static boolean equalsOnSystem(String filename1, String filename2) { 930 return equals(filename1, filename2, false, IOCase.SYSTEM); 931 } 932 933 //----------------------------------------------------------------------- 934 /** 935 * Checks whether two filenames are equal after both have been normalized. 936 * <p> 937 * Both filenames are first passed to {@link #normalize(String)}. 938 * The check is then performed in a case-sensitive manner. 939 * 940 * @param filename1 the first filename to query, may be null 941 * @param filename2 the second filename to query, may be null 942 * @return true if the filenames are equal, null equals null 943 * @see IOCase#SENSITIVE 944 */ 945 public static boolean equalsNormalized(String filename1, String filename2) { 946 return equals(filename1, filename2, true, IOCase.SENSITIVE); 947 } 948 949 /** 950 * Checks whether two filenames are equal after both have been normalized 951 * and using the case rules of the system. 952 * <p> 953 * Both filenames are first passed to {@link #normalize(String)}. 954 * The check is then performed case-sensitive on Unix and 955 * case-insensitive on Windows. 956 * 957 * @param filename1 the first filename to query, may be null 958 * @param filename2 the second filename to query, may be null 959 * @return true if the filenames are equal, null equals null 960 * @see IOCase#SYSTEM 961 */ 962 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) { 963 return equals(filename1, filename2, true, IOCase.SYSTEM); 964 } 965 966 /** 967 * Checks whether two filenames are equal, optionally normalizing and providing 968 * control over the case-sensitivity. 969 * 970 * @param filename1 the first filename to query, may be null 971 * @param filename2 the second filename to query, may be null 972 * @param normalized whether to normalize the filenames 973 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 974 * @return true if the filenames are equal, null equals null 975 * @since Commons IO 1.3 976 */ 977 public static boolean equals( 978 String filename1, String filename2, 979 boolean normalized, IOCase caseSensitivity) { 980 981 if (filename1 == null || filename2 == null) { 982 return filename1 == filename2; 983 } 984 if (normalized) { 985 filename1 = normalize(filename1); 986 filename2 = normalize(filename2); 987 if (filename1 == null || filename2 == null) { 988 throw new NullPointerException( 989 "Error normalizing one or both of the file names"); 990 } 991 } 992 if (caseSensitivity == null) { 993 caseSensitivity = IOCase.SENSITIVE; 994 } 995 return caseSensitivity.checkEquals(filename1, filename2); 996 } 997 998 //----------------------------------------------------------------------- 999 /** 1000 * Checks whether the extension of the filename is that specified. 1001 * <p> 1002 * This method obtains the extension as the textual part of the filename 1003 * after the last dot. There must be no directory separator after the dot. 1004 * The extension check is case-sensitive on all platforms. 1005 * 1006 * @param filename the filename to query, null returns false 1007 * @param extension the extension to check for, null or empty checks for no extension 1008 * @return true if the filename has the specified extension 1009 */ 1010 public static boolean isExtension(String filename, String extension) { 1011 if (filename == null) { 1012 return false; 1013 } 1014 if (extension == null || extension.length() == 0) { 1015 return (indexOfExtension(filename) == -1); 1016 } 1017 String fileExt = getExtension(filename); 1018 return fileExt.equals(extension); 1019 } 1020 1021 /** 1022 * Checks whether the extension of the filename is one of those specified. 1023 * <p> 1024 * This method obtains the extension as the textual part of the filename 1025 * after the last dot. There must be no directory separator after the dot. 1026 * The extension check is case-sensitive on all platforms. 1027 * 1028 * @param filename the filename to query, null returns false 1029 * @param extensions the extensions to check for, null checks for no extension 1030 * @return true if the filename is one of the extensions 1031 */ 1032 public static boolean isExtension(String filename, String[] extensions) { 1033 if (filename == null) { 1034 return false; 1035 } 1036 if (extensions == null || extensions.length == 0) { 1037 return (indexOfExtension(filename) == -1); 1038 } 1039 String fileExt = getExtension(filename); 1040 for (int i = 0; i < extensions.length; i++) { 1041 if (fileExt.equals(extensions[i])) { 1042 return true; 1043 } 1044 } 1045 return false; 1046 } 1047 1048 /** 1049 * Checks whether the extension of the filename is one of those specified. 1050 * <p> 1051 * This method obtains the extension as the textual part of the filename 1052 * after the last dot. There must be no directory separator after the dot. 1053 * The extension check is case-sensitive on all platforms. 1054 * 1055 * @param filename the filename to query, null returns false 1056 * @param extensions the extensions to check for, null checks for no extension 1057 * @return true if the filename is one of the extensions 1058 */ 1059 public static boolean isExtension(String filename, Collection<String> extensions) { 1060 if (filename == null) { 1061 return false; 1062 } 1063 if (extensions == null || extensions.isEmpty()) { 1064 return (indexOfExtension(filename) == -1); 1065 } 1066 String fileExt = getExtension(filename); 1067 for (Iterator<String> it = extensions.iterator(); it.hasNext();) { 1068 if (fileExt.equals(it.next())) { 1069 return true; 1070 } 1071 } 1072 return false; 1073 } 1074 1075 //----------------------------------------------------------------------- 1076 /** 1077 * Checks a filename to see if it matches the specified wildcard matcher, 1078 * always testing case-sensitive. 1079 * <p> 1080 * The wildcard matcher uses the characters '?' and '*' to represent a 1081 * single or multiple wildcard characters. 1082 * This is the same as often found on Dos/Unix command lines. 1083 * The check is case-sensitive always. 1084 * <pre> 1085 * wildcardMatch("c.txt", "*.txt") --> true 1086 * wildcardMatch("c.txt", "*.jpg") --> false 1087 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1088 * wildcardMatch("c.txt", "*.???") --> true 1089 * wildcardMatch("c.txt", "*.????") --> false 1090 * </pre> 1091 * 1092 * @param filename the filename to match on 1093 * @param wildcardMatcher the wildcard string to match against 1094 * @return true if the filename matches the wilcard string 1095 * @see IOCase#SENSITIVE 1096 */ 1097 public static boolean wildcardMatch(String filename, String wildcardMatcher) { 1098 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1099 } 1100 1101 /** 1102 * Checks a filename to see if it matches the specified wildcard matcher 1103 * using the case rules of the system. 1104 * <p> 1105 * The wildcard matcher uses the characters '?' and '*' to represent a 1106 * single or multiple wildcard characters. 1107 * This is the same as often found on Dos/Unix command lines. 1108 * The check is case-sensitive on Unix and case-insensitive on Windows. 1109 * <pre> 1110 * wildcardMatch("c.txt", "*.txt") --> true 1111 * wildcardMatch("c.txt", "*.jpg") --> false 1112 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1113 * wildcardMatch("c.txt", "*.???") --> true 1114 * wildcardMatch("c.txt", "*.????") --> false 1115 * </pre> 1116 * 1117 * @param filename the filename to match on 1118 * @param wildcardMatcher the wildcard string to match against 1119 * @return true if the filename matches the wilcard string 1120 * @see IOCase#SYSTEM 1121 */ 1122 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) { 1123 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1124 } 1125 1126 /** 1127 * Checks a filename to see if it matches the specified wildcard matcher 1128 * allowing control over case-sensitivity. 1129 * <p> 1130 * The wildcard matcher uses the characters '?' and '*' to represent a 1131 * single or multiple wildcard characters. 1132 * 1133 * @param filename the filename to match on 1134 * @param wildcardMatcher the wildcard string to match against 1135 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1136 * @return true if the filename matches the wilcard string 1137 * @since Commons IO 1.3 1138 */ 1139 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) { 1140 if (filename == null && wildcardMatcher == null) { 1141 return true; 1142 } 1143 if (filename == null || wildcardMatcher == null) { 1144 return false; 1145 } 1146 if (caseSensitivity == null) { 1147 caseSensitivity = IOCase.SENSITIVE; 1148 } 1149 filename = caseSensitivity.convertCase(filename); 1150 wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher); 1151 String[] wcs = splitOnTokens(wildcardMatcher); 1152 boolean anyChars = false; 1153 int textIdx = 0; 1154 int wcsIdx = 0; 1155 Stack<int[]> backtrack = new Stack<int[]>(); 1156 1157 // loop around a backtrack stack, to handle complex * matching 1158 do { 1159 if (backtrack.size() > 0) { 1160 int[] array = backtrack.pop(); 1161 wcsIdx = array[0]; 1162 textIdx = array[1]; 1163 anyChars = true; 1164 } 1165 1166 // loop whilst tokens and text left to process 1167 while (wcsIdx < wcs.length) { 1168 1169 if (wcs[wcsIdx].equals("?")) { 1170 // ? so move to next text char 1171 textIdx++; 1172 anyChars = false; 1173 1174 } else if (wcs[wcsIdx].equals("*")) { 1175 // set any chars status 1176 anyChars = true; 1177 if (wcsIdx == wcs.length - 1) { 1178 textIdx = filename.length(); 1179 } 1180 1181 } else { 1182 // matching text token 1183 if (anyChars) { 1184 // any chars then try to locate text token 1185 textIdx = filename.indexOf(wcs[wcsIdx], textIdx); 1186 if (textIdx == -1) { 1187 // token not found 1188 break; 1189 } 1190 int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1); 1191 if (repeat >= 0) { 1192 backtrack.push(new int[] {wcsIdx, repeat}); 1193 } 1194 } else { 1195 // matching from current position 1196 if (!filename.startsWith(wcs[wcsIdx], textIdx)) { 1197 // couldnt match token 1198 break; 1199 } 1200 } 1201 1202 // matched text token, move text index to end of matched token 1203 textIdx += wcs[wcsIdx].length(); 1204 anyChars = false; 1205 } 1206 1207 wcsIdx++; 1208 } 1209 1210 // full match 1211 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1212 return true; 1213 } 1214 1215 } while (backtrack.size() > 0); 1216 1217 return false; 1218 } 1219 1220 /** 1221 * Splits a string into a number of tokens. 1222 * 1223 * @param text the text to split 1224 * @return the tokens, never null 1225 */ 1226 static String[] splitOnTokens(String text) { 1227 // used by wildcardMatch 1228 // package level so a unit test may run on this 1229 1230 if (text.indexOf("?") == -1 && text.indexOf("*") == -1) { 1231 return new String[] { text }; 1232 } 1233 1234 char[] array = text.toCharArray(); 1235 ArrayList<String> list = new ArrayList<String>(); 1236 StringBuffer buffer = new StringBuffer(); 1237 for (int i = 0; i < array.length; i++) { 1238 if (array[i] == '?' || array[i] == '*') { 1239 if (buffer.length() != 0) { 1240 list.add(buffer.toString()); 1241 buffer.setLength(0); 1242 } 1243 if (array[i] == '?') { 1244 list.add("?"); 1245 } else if (list.size() == 0 || 1246 (i > 0 && list.get(list.size() - 1).equals("*") == false)) { 1247 list.add("*"); 1248 } 1249 } else { 1250 buffer.append(array[i]); 1251 } 1252 } 1253 if (buffer.length() != 0) { 1254 list.add(buffer.toString()); 1255 } 1256 1257 return list.toArray( new String[ list.size() ] ); 1258 } 1259 1260 } 1261