1 /* 2 * Conditions Of Use 3 * 4 * This software was developed by employees of the National Institute of 5 * Standards and Technology (NIST), an agency of the Federal Government. 6 * Pursuant to title 15 Untied States Code Section 105, works of NIST 7 * employees are not subject to copyright protection in the United States 8 * and are considered to be in the public domain. As a result, a formal 9 * license is not needed to use the software. 10 * 11 * This software is provided by NIST as a service and is expressly 12 * provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED 13 * OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF 14 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT 15 * AND DATA ACCURACY. NIST does not warrant or make any representations 16 * regarding the use of the software or the results thereof, including but 17 * not limited to the correctness, accuracy, reliability or usefulness of 18 * the software. 19 * 20 * Permission to use this software is contingent upon your acceptance 21 * of the terms of this agreement 22 * 23 * . 24 * 25 */ 26 package gov.nist.javax.sip.parser; 27 import gov.nist.core.HostNameParser; 28 import gov.nist.core.HostPort; 29 import gov.nist.core.NameValue; 30 import gov.nist.core.NameValueList; 31 import gov.nist.core.Token; 32 import gov.nist.javax.sip.address.GenericURI; 33 import gov.nist.javax.sip.address.SipUri; 34 import gov.nist.javax.sip.address.TelURLImpl; 35 import gov.nist.javax.sip.address.TelephoneNumber; 36 import java.text.ParseException; 37 38 /** 39 * Parser For SIP and Tel URLs. Other kinds of URL's are handled by the 40 * J2SE 1.4 URL class. 41 * @version 1.2 $Revision: 1.27 $ $Date: 2009/10/22 10:27:39 $ 42 * 43 * @author M. Ranganathan <br/> 44 * 45 * 46 */ 47 public class URLParser extends Parser { 48 49 public URLParser(String url) { 50 this.lexer = new Lexer("sip_urlLexer", url); 51 } 52 53 // public tag added - issued by Miguel Freitas 54 public URLParser(Lexer lexer) { 55 this.lexer = lexer; 56 this.lexer.selectLexer("sip_urlLexer"); 57 } 58 protected static boolean isMark(char next) { 59 switch (next) { 60 case '-': 61 case '_': 62 case '.': 63 case '!': 64 case '~': 65 case '*': 66 case '\'': 67 case '(': 68 case ')': 69 return true; 70 default: 71 return false; 72 } 73 } 74 75 protected static boolean isUnreserved(char next) { 76 return Lexer.isAlphaDigit(next) || isMark(next); 77 } 78 79 protected static boolean isReservedNoSlash(char next) { 80 switch (next) { 81 case ';': 82 case '?': 83 case ':': 84 case '@': 85 case '&': 86 case '+': 87 case '$': 88 case ',': 89 return true; 90 default: 91 return false; 92 } 93 } 94 95 // Missing '=' bug in character set - discovered by interop testing 96 // at SIPIT 13 by Bob Johnson and Scott Holben. 97 // change . to ; by Bruno Konik 98 protected static boolean isUserUnreserved(char la) { 99 switch (la) { 100 case '&': 101 case '?': 102 case '+': 103 case '$': 104 case '#': 105 case '/': 106 case ',': 107 case ';': 108 case '=': 109 return true; 110 default: 111 return false; 112 } 113 } 114 115 protected String unreserved() throws ParseException { 116 char next = lexer.lookAhead(0); 117 if (isUnreserved(next)) { 118 lexer.consume(1); 119 return String.valueOf(next); 120 } else 121 throw createParseException("unreserved"); 122 123 } 124 125 /** Name or value of a parameter. 126 */ 127 protected String paramNameOrValue() throws ParseException { 128 int startIdx = lexer.getPtr(); 129 while (lexer.hasMoreChars()) { 130 char next = lexer.lookAhead(0); 131 boolean isValidChar = false; 132 switch (next) { 133 case '[': 134 case ']':// JvB: fixed this one 135 case '/': 136 case ':': 137 case '&': 138 case '+': 139 case '$': 140 isValidChar = true; 141 } 142 if (isValidChar || isUnreserved(next)) { 143 lexer.consume(1); 144 } else if (isEscaped()) { 145 lexer.consume(3); 146 } else 147 break; 148 } 149 return lexer.getBuffer().substring(startIdx, lexer.getPtr()); 150 } 151 152 private NameValue uriParam() throws ParseException { 153 if (debug) 154 dbg_enter("uriParam"); 155 try { 156 String pvalue = ""; 157 String pname = paramNameOrValue(); 158 char next = lexer.lookAhead(0); 159 boolean isFlagParam = true; 160 if (next == '=') { 161 lexer.consume(1); 162 pvalue = paramNameOrValue(); 163 isFlagParam = false; 164 } 165 if (pname.length() == 0 && 166 ( pvalue == null || 167 pvalue.length() == 0)) 168 return null; 169 else return new NameValue(pname, pvalue, isFlagParam); 170 } finally { 171 if (debug) 172 dbg_leave("uriParam"); 173 } 174 } 175 176 protected static boolean isReserved(char next) { 177 switch (next) { 178 case ';': 179 case '/': 180 case '?': 181 case ':': 182 case '=': // Bug fix by Bruno Konik 183 case '@': 184 case '&': 185 case '+': 186 case '$': 187 case ',': 188 return true; 189 default: 190 return false; 191 } 192 } 193 194 protected String reserved() throws ParseException { 195 char next = lexer.lookAhead(0); 196 if (isReserved(next)) { 197 lexer.consume(1); 198 return new StringBuffer().append(next).toString(); 199 } else 200 throw createParseException("reserved"); 201 } 202 203 protected boolean isEscaped() { 204 try { 205 return lexer.lookAhead(0) == '%' && 206 Lexer.isHexDigit(lexer.lookAhead(1)) && 207 Lexer.isHexDigit(lexer.lookAhead(2)); 208 } catch (Exception ex) { 209 return false; 210 } 211 } 212 213 protected String escaped() throws ParseException { 214 if (debug) 215 dbg_enter("escaped"); 216 try { 217 StringBuffer retval = new StringBuffer(); 218 char next = lexer.lookAhead(0); 219 char next1 = lexer.lookAhead(1); 220 char next2 = lexer.lookAhead(2); 221 if (next == '%' 222 && Lexer.isHexDigit(next1) 223 && Lexer.isHexDigit(next2)) { 224 lexer.consume(3); 225 retval.append(next); 226 retval.append(next1); 227 retval.append(next2); 228 } else 229 throw createParseException("escaped"); 230 return retval.toString(); 231 } finally { 232 if (debug) 233 dbg_leave("escaped"); 234 } 235 } 236 237 protected String mark() throws ParseException { 238 if (debug) 239 dbg_enter("mark"); 240 try { 241 char next = lexer.lookAhead(0); 242 if (isMark(next)) { 243 lexer.consume(1); 244 return new String( new char[]{next} ); 245 } else 246 throw createParseException("mark"); 247 } finally { 248 if (debug) 249 dbg_leave("mark"); 250 } 251 } 252 253 protected String uric() { 254 if (debug) 255 dbg_enter("uric"); 256 try { 257 try { 258 char la = lexer.lookAhead(0); 259 if (isUnreserved(la)) { 260 lexer.consume(1); 261 return Lexer.charAsString(la); 262 } else if (isReserved(la)) { 263 lexer.consume(1); 264 return Lexer.charAsString(la); 265 } else if (isEscaped()) { 266 String retval = lexer.charAsString(3); 267 lexer.consume(3); 268 return retval; 269 } else 270 return null; 271 } catch (Exception ex) { 272 return null; 273 } 274 } finally { 275 if (debug) 276 dbg_leave("uric"); 277 } 278 279 } 280 281 protected String uricNoSlash() { 282 if (debug) 283 dbg_enter("uricNoSlash"); 284 try { 285 try { 286 char la = lexer.lookAhead(0); 287 if (isEscaped()) { 288 String retval = lexer.charAsString(3); 289 lexer.consume(3); 290 return retval; 291 } else if (isUnreserved(la)) { 292 lexer.consume(1); 293 return Lexer.charAsString(la); 294 } else if (isReservedNoSlash(la)) { 295 lexer.consume(1); 296 return Lexer.charAsString(la); 297 } else 298 return null; 299 } catch (ParseException ex) { 300 return null; 301 } 302 } finally { 303 if (debug) 304 dbg_leave("uricNoSlash"); 305 } 306 } 307 308 protected String uricString() throws ParseException { 309 StringBuffer retval = new StringBuffer(); 310 while (true) { 311 String next = uric(); 312 if (next == null) { 313 char la = lexer.lookAhead(0); 314 // JvB: allow IPv6 addresses in generic URI strings 315 // e.g. http://[::1] 316 if ( la == '[' ) { 317 HostNameParser hnp = new HostNameParser(this.getLexer()); 318 HostPort hp = hnp.hostPort( false ); 319 retval.append(hp.toString()); 320 continue; 321 } 322 break; 323 } 324 retval.append(next); 325 } 326 return retval.toString(); 327 } 328 329 /** 330 * Parse and return a structure for a generic URL. 331 * Note that non SIP URLs are just stored as a string (not parsed). 332 * @return URI is a URL structure for a SIP url. 333 * @throws ParseException if there was a problem parsing. 334 */ 335 public GenericURI uriReference( boolean inBrackets ) throws ParseException { 336 if (debug) 337 dbg_enter("uriReference"); 338 GenericURI retval = null; 339 Token[] tokens = lexer.peekNextToken(2); 340 Token t1 = (Token) tokens[0]; 341 Token t2 = (Token) tokens[1]; 342 try { 343 344 if (t1.getTokenType() == TokenTypes.SIP || 345 t1.getTokenType() == TokenTypes.SIPS) { 346 if (t2.getTokenType() == ':') 347 retval = sipURL( inBrackets ); 348 else 349 throw createParseException("Expecting \':\'"); 350 } else if (t1.getTokenType() == TokenTypes.TEL) { 351 if (t2.getTokenType() == ':') { 352 retval = telURL( inBrackets ); 353 } else 354 throw createParseException("Expecting \':\'"); 355 } else { 356 String urlString = uricString(); 357 try { 358 retval = new GenericURI(urlString); 359 } catch (ParseException ex) { 360 throw createParseException(ex.getMessage()); 361 } 362 } 363 } finally { 364 if (debug) 365 dbg_leave("uriReference"); 366 } 367 return retval; 368 } 369 370 /** 371 * Parser for the base phone number. 372 */ 373 private String base_phone_number() throws ParseException { 374 StringBuffer s = new StringBuffer(); 375 376 if (debug) 377 dbg_enter("base_phone_number"); 378 try { 379 int lc = 0; 380 while (lexer.hasMoreChars()) { 381 char w = lexer.lookAhead(0); 382 if (Lexer.isDigit(w) 383 || w == '-' 384 || w == '.' 385 || w == '(' 386 || w == ')') { 387 lexer.consume(1); 388 s.append(w); 389 lc++; 390 } else if (lc > 0) 391 break; 392 else 393 throw createParseException("unexpected " + w); 394 } 395 return s.toString(); 396 } finally { 397 if (debug) 398 dbg_leave("base_phone_number"); 399 } 400 401 } 402 403 /** 404 * Parser for the local phone #. 405 */ 406 private String local_number() throws ParseException { 407 StringBuffer s = new StringBuffer(); 408 if (debug) 409 dbg_enter("local_number"); 410 try { 411 int lc = 0; 412 while (lexer.hasMoreChars()) { 413 char la = lexer.lookAhead(0); 414 if (la == '*' 415 || la == '#' 416 || la == '-' 417 || la == '.' 418 || la == '(' 419 || la == ')' 420 // JvB: allow 'A'..'F', should be uppercase 421 || Lexer.isHexDigit(la)) { 422 lexer.consume(1); 423 s.append(la); 424 lc++; 425 } else if (lc > 0) 426 break; 427 else 428 throw createParseException("unexepcted " + la); 429 } 430 return s.toString(); 431 } finally { 432 if (debug) 433 dbg_leave("local_number"); 434 } 435 436 } 437 438 /** 439 * Parser for telephone subscriber. 440 * 441 * @return the parsed telephone number. 442 */ 443 public final TelephoneNumber parseTelephoneNumber( boolean inBrackets ) 444 throws ParseException { 445 TelephoneNumber tn; 446 447 if (debug) 448 dbg_enter("telephone_subscriber"); 449 lexer.selectLexer("charLexer"); 450 try { 451 char c = lexer.lookAhead(0); 452 if (c == '+') 453 tn = global_phone_number( inBrackets ); 454 else if ( 455 Lexer.isHexDigit(c)// see RFC3966 456 || c == '#' 457 || c == '*' 458 || c == '-' 459 || c == '.' 460 || c == '(' 461 || c == ')' ) { 462 tn = local_phone_number( inBrackets ); 463 } else 464 throw createParseException("unexpected char " + c); 465 return tn; 466 } finally { 467 if (debug) 468 dbg_leave("telephone_subscriber"); 469 } 470 471 } 472 473 private final TelephoneNumber global_phone_number( boolean inBrackets ) throws ParseException { 474 if (debug) 475 dbg_enter("global_phone_number"); 476 try { 477 TelephoneNumber tn = new TelephoneNumber(); 478 tn.setGlobal(true); 479 NameValueList nv = null; 480 this.lexer.match(PLUS); 481 String b = base_phone_number(); 482 tn.setPhoneNumber(b); 483 if (lexer.hasMoreChars()) { 484 char tok = lexer.lookAhead(0); 485 if (tok == ';' && inBrackets) { 486 this.lexer.consume(1); 487 nv = tel_parameters(); 488 tn.setParameters(nv); 489 } 490 } 491 return tn; 492 } finally { 493 if (debug) 494 dbg_leave("global_phone_number"); 495 } 496 } 497 498 private TelephoneNumber local_phone_number( boolean inBrackets ) throws ParseException { 499 if (debug) 500 dbg_enter("local_phone_number"); 501 TelephoneNumber tn = new TelephoneNumber(); 502 tn.setGlobal(false); 503 NameValueList nv = null; 504 String b = null; 505 try { 506 b = local_number(); 507 tn.setPhoneNumber(b); 508 if (lexer.hasMoreChars()) { 509 Token tok = this.lexer.peekNextToken(); 510 switch (tok.getTokenType()) { 511 case SEMICOLON: 512 { 513 if (inBrackets) { 514 this.lexer.consume(1); 515 nv = tel_parameters(); 516 tn.setParameters(nv); 517 } 518 break; 519 } 520 default : 521 { 522 break; 523 } 524 } 525 } 526 } finally { 527 if (debug) 528 dbg_leave("local_phone_number"); 529 } 530 return tn; 531 } 532 533 private NameValueList tel_parameters() throws ParseException { 534 NameValueList nvList = new NameValueList(); 535 536 // JvB: Need to handle 'phone-context' specially 537 // 'isub' (or 'ext') MUST appear first, but we accept any order here 538 NameValue nv; 539 while ( true ) { 540 String pname = paramNameOrValue(); 541 542 // Handle 'phone-context' specially, it may start with '+' 543 if ( pname.equalsIgnoreCase("phone-context")) { 544 nv = phone_context(); 545 } else { 546 if (lexer.lookAhead(0) == '=') { 547 lexer.consume(1); 548 String value = paramNameOrValue(); 549 nv = new NameValue( pname, value, false ); 550 } else { 551 nv = new NameValue( pname, "", true );// flag param 552 } 553 } 554 nvList.set( nv ); 555 556 if ( lexer.lookAhead(0) == ';' ) { 557 lexer.consume(1); 558 } else { 559 return nvList; 560 } 561 } 562 563 } 564 565 /** 566 * Parses the 'phone-context' parameter in tel: URLs 567 * @throws ParseException 568 */ 569 private NameValue phone_context() throws ParseException { 570 lexer.match('='); 571 572 char la = lexer.lookAhead(0); 573 Object value; 574 if (la=='+') {// global-number-digits 575 lexer.consume(1);// skip '+' 576 value = "+" + base_phone_number(); 577 } else if ( Lexer.isAlphaDigit(la) ) { 578 Token t = lexer.match( Lexer.ID );// more broad than allowed 579 value = t.getTokenValue(); 580 } else { 581 throw new ParseException( "Invalid phone-context:" + la , -1 ); 582 } 583 return new NameValue( "phone-context", value, false ); 584 } 585 586 /** 587 * Parse and return a structure for a Tel URL. 588 * @return a parsed tel url structure. 589 */ 590 public TelURLImpl telURL( boolean inBrackets ) throws ParseException { 591 lexer.match(TokenTypes.TEL); 592 lexer.match(':'); 593 TelephoneNumber tn = this.parseTelephoneNumber(inBrackets); 594 TelURLImpl telUrl = new TelURLImpl(); 595 telUrl.setTelephoneNumber(tn); 596 return telUrl; 597 598 } 599 600 /** 601 * Parse and return a structure for a SIP URL. 602 * @return a URL structure for a SIP url. 603 * @throws ParseException if there was a problem parsing. 604 */ 605 public SipUri sipURL( boolean inBrackets ) throws ParseException { 606 if (debug) 607 dbg_enter("sipURL"); 608 SipUri retval = new SipUri(); 609 // pmusgrave - handle sips case 610 Token nextToken = lexer.peekNextToken(); 611 int sipOrSips = TokenTypes.SIP; 612 String scheme = TokenNames.SIP; 613 if ( nextToken.getTokenType() == TokenTypes.SIPS) 614 { 615 sipOrSips = TokenTypes.SIPS; 616 scheme = TokenNames.SIPS; 617 } 618 619 try { 620 lexer.match(sipOrSips); 621 lexer.match(':'); 622 retval.setScheme(scheme); 623 int startOfUser = lexer.markInputPosition(); 624 String userOrHost = user();// Note: user may contain ';', host may not... 625 String passOrPort = null; 626 627 // name:password or host:port 628 if ( lexer.lookAhead() == ':' ) { 629 lexer.consume(1); 630 passOrPort = password(); 631 } 632 633 // name@hostPort 634 if ( lexer.lookAhead() == '@' ) { 635 lexer.consume(1); 636 retval.setUser( userOrHost ); 637 if (passOrPort!=null) retval.setUserPassword( passOrPort ); 638 } else { 639 // then userOrHost was a host, backtrack just in case a ';' was eaten... 640 lexer.rewindInputPosition( startOfUser ); 641 } 642 643 HostNameParser hnp = new HostNameParser(this.getLexer()); 644 HostPort hp = hnp.hostPort( false ); 645 retval.setHostPort(hp); 646 647 lexer.selectLexer("charLexer"); 648 while (lexer.hasMoreChars()) { 649 // If the URI is not enclosed in brackets, parameters belong to header 650 if (lexer.lookAhead(0) != ';' || !inBrackets) 651 break; 652 lexer.consume(1); 653 NameValue parms = uriParam(); 654 if (parms != null) retval.setUriParameter(parms); 655 } 656 657 if (lexer.hasMoreChars() && lexer.lookAhead(0) == '?') { 658 lexer.consume(1); 659 while (lexer.hasMoreChars()) { 660 NameValue parms = qheader(); 661 retval.setQHeader(parms); 662 if (lexer.hasMoreChars() && lexer.lookAhead(0) != '&') 663 break; 664 else 665 lexer.consume(1); 666 } 667 } 668 return retval; 669 } finally { 670 if (debug) 671 dbg_leave("sipURL"); 672 } 673 } 674 675 public String peekScheme() throws ParseException { 676 Token[] tokens = lexer.peekNextToken(1); 677 if (tokens.length == 0) 678 return null; 679 String scheme = ((Token) tokens[0]).getTokenValue(); 680 return scheme; 681 } 682 683 /** 684 * Get a name value for a given query header (ie one that comes 685 * after the ?). 686 */ 687 protected NameValue qheader() throws ParseException { 688 String name = lexer.getNextToken('='); 689 lexer.consume(1); 690 String value = hvalue(); 691 return new NameValue(name, value, false); 692 693 } 694 695 protected String hvalue() throws ParseException { 696 StringBuffer retval = new StringBuffer(); 697 while (lexer.hasMoreChars()) { 698 char la = lexer.lookAhead(0); 699 // Look for a character that can terminate a URL. 700 boolean isValidChar = false; 701 switch (la) { 702 case '+': 703 case '?': 704 case ':': 705 case '[': 706 case ']': 707 case '/': 708 case '$': 709 case '_': 710 case '-': 711 case '"': 712 case '!': 713 case '~': 714 case '*': 715 case '.': 716 case '(': 717 case ')': 718 isValidChar = true; 719 } 720 if (isValidChar || Lexer.isAlphaDigit(la)) { 721 lexer.consume(1); 722 retval.append(la); 723 } else if (la == '%') { 724 retval.append(escaped()); 725 } else 726 break; 727 } 728 return retval.toString(); 729 } 730 731 /** 732 * Scan forward until you hit a terminating character for a URL. 733 * We do not handle non sip urls in this implementation. 734 * @return the string that takes us to the end of this URL (i.e. to 735 * the next delimiter). 736 */ 737 protected String urlString() throws ParseException { 738 StringBuffer retval = new StringBuffer(); 739 lexer.selectLexer("charLexer"); 740 741 while (lexer.hasMoreChars()) { 742 char la = lexer.lookAhead(0); 743 // Look for a character that can terminate a URL. 744 if (la == ' ' 745 || la == '\t' 746 || la == '\n' 747 || la == '>' 748 || la == '<') 749 break; 750 lexer.consume(0); 751 retval.append(la); 752 } 753 return retval.toString(); 754 } 755 756 protected String user() throws ParseException { 757 if (debug) 758 dbg_enter("user"); 759 try { 760 int startIdx = lexer.getPtr(); 761 while (lexer.hasMoreChars()) { 762 char la = lexer.lookAhead(0); 763 if (isUnreserved(la) || isUserUnreserved(la)) { 764 lexer.consume(1); 765 } else if (isEscaped()) { 766 lexer.consume(3); 767 } else 768 break; 769 } 770 return lexer.getBuffer().substring(startIdx, lexer.getPtr()); 771 } finally { 772 if (debug) 773 dbg_leave("user"); 774 } 775 776 } 777 778 protected String password() throws ParseException { 779 int startIdx = lexer.getPtr(); 780 while (true) { 781 char la = lexer.lookAhead(0); 782 boolean isValidChar = false; 783 switch (la) { 784 case '&': 785 case '=': 786 case '+': 787 case '$': 788 case ',': 789 isValidChar = true; 790 } 791 if (isValidChar || isUnreserved(la)) { 792 lexer.consume(1); 793 } else if (isEscaped()) { 794 lexer.consume(3); // bug reported by 795 // Jeff Haynie 796 } else 797 break; 798 799 } 800 return lexer.getBuffer().substring(startIdx, lexer.getPtr()); 801 } 802 803 /** 804 * Default parse method. This method just calls uriReference. 805 */ 806 public GenericURI parse() throws ParseException { 807 return uriReference( true ); 808 } 809 810 // quick test routine for debugging type assignment 811 public static void main(String[] args) throws ParseException 812 { 813 // quick test for sips parsing 814 String[] test = { "sip:alice (at) example.com", 815 "sips:alice (at) examples.com" , 816 "sip:3Zqkv5dajqaaas0tCjCxT0xH2ZEuEMsFl0xoasip%3A%2B3519116786244%40siplab.domain.com (at) 213.0.115.163:7070"}; 817 818 for ( int i = 0; i < test.length; i++) 819 { 820 URLParser p = new URLParser(test[i]); 821 822 GenericURI uri = p.parse(); 823 System.out.println("uri type returned " + uri.getClass().getName()); 824 System.out.println(test[i] + " is SipUri? " + uri.isSipURI() 825 + ">" + uri.encode()); 826 } 827 } 828 829 /** 830 831 **/ 832 } 833 834