001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: URI.java 468654 2006-10-28 07:09:23Z minchau $ 020 */ 021 package org.apache.xml.serializer.utils; 022 023 import java.io.IOException; 024 import java.io.Serializable; 025 026 027 /** 028 * A class to represent a Uniform Resource Identifier (URI). This class 029 * is designed to handle the parsing of URIs and provide access to 030 * the various components (scheme, host, port, userinfo, path, query 031 * string and fragment) that may constitute a URI. 032 * <p> 033 * Parsing of a URI specification is done according to the URI 034 * syntax described in RFC 2396 035 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists 036 * of a scheme, followed by a colon (':'), followed by a scheme-specific 037 * part. For URIs that follow the "generic URI" syntax, the scheme- 038 * specific part begins with two slashes ("//") and may be followed 039 * by an authority segment (comprised of user information, host, and 040 * port), path segment, query segment and fragment. Note that RFC 2396 041 * no longer specifies the use of the parameters segment and excludes 042 * the "user:password" syntax as part of the authority segment. If 043 * "user:password" appears in a URI, the entire user/password string 044 * is stored as userinfo. 045 * <p> 046 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 047 * the entire scheme-specific part is treated as the "path" portion 048 * of the URI. 049 * <p> 050 * Note that, unlike the java.net.URL class, this class does not provide 051 * any built-in network access functionality nor does it provide any 052 * scheme-specific functionality (for example, it does not know a 053 * default port for a specific scheme). Rather, it only knows the 054 * grammar and basic set of operations that can be applied to a URI. 055 * 056 * This class is a copy of the one in org.apache.xml.utils. 057 * It exists to cut the serializers dependancy on that package. 058 * 059 * A minor change from the original is that this class no longer implements 060 * Serializable, and the serialVersionUID magic field is dropped, and 061 * the class is no longer "public". 062 * 063 * @xsl.usage internal 064 */ 065 final class URI 066 { 067 /** 068 * MalformedURIExceptions are thrown in the process of building a URI 069 * or setting fields on a URI when an operation would result in an 070 * invalid URI specification. 071 * 072 */ 073 public static class MalformedURIException extends IOException 074 { 075 076 /** 077 * Constructs a <code>MalformedURIException</code> with no specified 078 * detail message. 079 */ 080 public MalformedURIException() 081 { 082 super(); 083 } 084 085 /** 086 * Constructs a <code>MalformedURIException</code> with the 087 * specified detail message. 088 * 089 * @param p_msg the detail message. 090 */ 091 public MalformedURIException(String p_msg) 092 { 093 super(p_msg); 094 } 095 } 096 097 /** reserved characters */ 098 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,"; 099 100 /** 101 * URI punctuation mark characters - these, combined with 102 * alphanumerics, constitute the "unreserved" characters 103 */ 104 private static final String MARK_CHARACTERS = "-_.!~*'() "; 105 106 /** scheme can be composed of alphanumerics and these characters */ 107 private static final String SCHEME_CHARACTERS = "+-."; 108 109 /** 110 * userinfo can be composed of unreserved, escaped and these 111 * characters 112 */ 113 private static final String USERINFO_CHARACTERS = ";:&=+$,"; 114 115 /** Stores the scheme (usually the protocol) for this URI. 116 * @serial */ 117 private String m_scheme = null; 118 119 /** If specified, stores the userinfo for this URI; otherwise null. 120 * @serial */ 121 private String m_userinfo = null; 122 123 /** If specified, stores the host for this URI; otherwise null. 124 * @serial */ 125 private String m_host = null; 126 127 /** If specified, stores the port for this URI; otherwise -1. 128 * @serial */ 129 private int m_port = -1; 130 131 /** If specified, stores the path for this URI; otherwise null. 132 * @serial */ 133 private String m_path = null; 134 135 /** 136 * If specified, stores the query string for this URI; otherwise 137 * null. 138 * @serial 139 */ 140 private String m_queryString = null; 141 142 /** If specified, stores the fragment for this URI; otherwise null. 143 * @serial */ 144 private String m_fragment = null; 145 146 /** Indicate whether in DEBUG mode */ 147 private static boolean DEBUG = false; 148 149 /** 150 * Construct a new and uninitialized URI. 151 */ 152 public URI(){} 153 154 /** 155 * Construct a new URI from another URI. All fields for this URI are 156 * set equal to the fields of the URI passed in. 157 * 158 * @param p_other the URI to copy (cannot be null) 159 */ 160 public URI(URI p_other) 161 { 162 initialize(p_other); 163 } 164 165 /** 166 * Construct a new URI from a URI specification string. If the 167 * specification follows the "generic URI" syntax, (two slashes 168 * following the first colon), the specification will be parsed 169 * accordingly - setting the scheme, userinfo, host,port, path, query 170 * string and fragment fields as necessary. If the specification does 171 * not follow the "generic URI" syntax, the specification is parsed 172 * into a scheme and scheme-specific part (stored as the path) only. 173 * 174 * @param p_uriSpec the URI specification string (cannot be null or 175 * empty) 176 * 177 * @throws MalformedURIException if p_uriSpec violates any syntax 178 * rules 179 */ 180 public URI(String p_uriSpec) throws MalformedURIException 181 { 182 this((URI) null, p_uriSpec); 183 } 184 185 /** 186 * Construct a new URI from a base URI and a URI specification string. 187 * The URI specification string may be a relative URI. 188 * 189 * @param p_base the base URI (cannot be null if p_uriSpec is null or 190 * empty) 191 * @param p_uriSpec the URI specification string (cannot be null or 192 * empty if p_base is null) 193 * 194 * @throws MalformedURIException if p_uriSpec violates any syntax 195 * rules 196 */ 197 public URI(URI p_base, String p_uriSpec) throws MalformedURIException 198 { 199 initialize(p_base, p_uriSpec); 200 } 201 202 /** 203 * Construct a new URI that does not follow the generic URI syntax. 204 * Only the scheme and scheme-specific part (stored as the path) are 205 * initialized. 206 * 207 * @param p_scheme the URI scheme (cannot be null or empty) 208 * @param p_schemeSpecificPart the scheme-specific part (cannot be 209 * null or empty) 210 * 211 * @throws MalformedURIException if p_scheme violates any 212 * syntax rules 213 */ 214 public URI(String p_scheme, String p_schemeSpecificPart) 215 throws MalformedURIException 216 { 217 218 if (p_scheme == null || p_scheme.trim().length() == 0) 219 { 220 throw new MalformedURIException( 221 "Cannot construct URI with null/empty scheme!"); 222 } 223 224 if (p_schemeSpecificPart == null 225 || p_schemeSpecificPart.trim().length() == 0) 226 { 227 throw new MalformedURIException( 228 "Cannot construct URI with null/empty scheme-specific part!"); 229 } 230 231 setScheme(p_scheme); 232 setPath(p_schemeSpecificPart); 233 } 234 235 /** 236 * Construct a new URI that follows the generic URI syntax from its 237 * component parts. Each component is validated for syntax and some 238 * basic semantic checks are performed as well. See the individual 239 * setter methods for specifics. 240 * 241 * @param p_scheme the URI scheme (cannot be null or empty) 242 * @param p_host the hostname or IPv4 address for the URI 243 * @param p_path the URI path - if the path contains '?' or '#', 244 * then the query string and/or fragment will be 245 * set from the path; however, if the query and 246 * fragment are specified both in the path and as 247 * separate parameters, an exception is thrown 248 * @param p_queryString the URI query string (cannot be specified 249 * if path is null) 250 * @param p_fragment the URI fragment (cannot be specified if path 251 * is null) 252 * 253 * @throws MalformedURIException if any of the parameters violates 254 * syntax rules or semantic rules 255 */ 256 public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment) 257 throws MalformedURIException 258 { 259 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 260 } 261 262 /** 263 * Construct a new URI that follows the generic URI syntax from its 264 * component parts. Each component is validated for syntax and some 265 * basic semantic checks are performed as well. See the individual 266 * setter methods for specifics. 267 * 268 * @param p_scheme the URI scheme (cannot be null or empty) 269 * @param p_userinfo the URI userinfo (cannot be specified if host 270 * is null) 271 * @param p_host the hostname or IPv4 address for the URI 272 * @param p_port the URI port (may be -1 for "unspecified"; cannot 273 * be specified if host is null) 274 * @param p_path the URI path - if the path contains '?' or '#', 275 * then the query string and/or fragment will be 276 * set from the path; however, if the query and 277 * fragment are specified both in the path and as 278 * separate parameters, an exception is thrown 279 * @param p_queryString the URI query string (cannot be specified 280 * if path is null) 281 * @param p_fragment the URI fragment (cannot be specified if path 282 * is null) 283 * 284 * @throws MalformedURIException if any of the parameters violates 285 * syntax rules or semantic rules 286 */ 287 public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment) 288 throws MalformedURIException 289 { 290 291 if (p_scheme == null || p_scheme.trim().length() == 0) 292 { 293 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!"); 294 } 295 296 if (p_host == null) 297 { 298 if (p_userinfo != null) 299 { 300 throw new MalformedURIException( 301 Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!"); 302 } 303 304 if (p_port != -1) 305 { 306 throw new MalformedURIException( 307 Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!"); 308 } 309 } 310 311 if (p_path != null) 312 { 313 if (p_path.indexOf('?') != -1 && p_queryString != null) 314 { 315 throw new MalformedURIException( 316 Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!"); 317 } 318 319 if (p_path.indexOf('#') != -1 && p_fragment != null) 320 { 321 throw new MalformedURIException( 322 Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!"); 323 } 324 } 325 326 setScheme(p_scheme); 327 setHost(p_host); 328 setPort(p_port); 329 setUserinfo(p_userinfo); 330 setPath(p_path); 331 setQueryString(p_queryString); 332 setFragment(p_fragment); 333 } 334 335 /** 336 * Initialize all fields of this URI from another URI. 337 * 338 * @param p_other the URI to copy (cannot be null) 339 */ 340 private void initialize(URI p_other) 341 { 342 343 m_scheme = p_other.getScheme(); 344 m_userinfo = p_other.getUserinfo(); 345 m_host = p_other.getHost(); 346 m_port = p_other.getPort(); 347 m_path = p_other.getPath(); 348 m_queryString = p_other.getQueryString(); 349 m_fragment = p_other.getFragment(); 350 } 351 352 /** 353 * Initializes this URI from a base URI and a URI specification string. 354 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 355 * the URI and Section 5 for specifications on resolving relative URIs 356 * and relative paths. 357 * 358 * @param p_base the base URI (may be null if p_uriSpec is an absolute 359 * URI) 360 * @param p_uriSpec the URI spec string which may be an absolute or 361 * relative URI (can only be null/empty if p_base 362 * is not null) 363 * 364 * @throws MalformedURIException if p_base is null and p_uriSpec 365 * is not an absolute URI or if 366 * p_uriSpec violates syntax rules 367 */ 368 private void initialize(URI p_base, String p_uriSpec) 369 throws MalformedURIException 370 { 371 372 if (p_base == null 373 && (p_uriSpec == null || p_uriSpec.trim().length() == 0)) 374 { 375 throw new MalformedURIException( 376 Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters."); 377 } 378 379 // just make a copy of the base if spec is empty 380 if (p_uriSpec == null || p_uriSpec.trim().length() == 0) 381 { 382 initialize(p_base); 383 384 return; 385 } 386 387 String uriSpec = p_uriSpec.trim(); 388 int uriSpecLen = uriSpec.length(); 389 int index = 0; 390 391 // check for scheme 392 int colonIndex = uriSpec.indexOf(':'); 393 if (colonIndex < 0) 394 { 395 if (p_base == null) 396 { 397 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec); 398 } 399 } 400 else 401 { 402 initializeScheme(uriSpec); 403 uriSpec = uriSpec.substring(colonIndex+1); 404 uriSpecLen = uriSpec.length(); 405 } 406 407 // two slashes means generic URI syntax, so we get the authority 408 if (uriSpec.startsWith("//")) 409 { 410 index += 2; 411 412 int startPos = index; 413 414 // get authority - everything up to path, query or fragment 415 char testChar = '\0'; 416 417 while (index < uriSpecLen) 418 { 419 testChar = uriSpec.charAt(index); 420 421 if (testChar == '/' || testChar == '?' || testChar == '#') 422 { 423 break; 424 } 425 426 index++; 427 } 428 429 // if we found authority, parse it out, otherwise we set the 430 // host to empty string 431 if (index > startPos) 432 { 433 initializeAuthority(uriSpec.substring(startPos, index)); 434 } 435 else 436 { 437 m_host = ""; 438 } 439 } 440 441 initializePath(uriSpec.substring(index)); 442 443 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 444 // In some cases, it might make more sense to throw an exception 445 // (when scheme is specified is the string spec and the base URI 446 // is also specified, for example), but we're just following the 447 // RFC specifications 448 if (p_base != null) 449 { 450 451 // check to see if this is the current doc - RFC 2396 5.2 #2 452 // note that this is slightly different from the RFC spec in that 453 // we don't include the check for query string being null 454 // - this handles cases where the urispec is just a query 455 // string or a fragment (e.g. "?y" or "#s") - 456 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 457 // identified this as a bug in the RFC 458 if (m_path.length() == 0 && m_scheme == null && m_host == null) 459 { 460 m_scheme = p_base.getScheme(); 461 m_userinfo = p_base.getUserinfo(); 462 m_host = p_base.getHost(); 463 m_port = p_base.getPort(); 464 m_path = p_base.getPath(); 465 466 if (m_queryString == null) 467 { 468 m_queryString = p_base.getQueryString(); 469 } 470 471 return; 472 } 473 474 // check for scheme - RFC 2396 5.2 #3 475 // if we found a scheme, it means absolute URI, so we're done 476 if (m_scheme == null) 477 { 478 m_scheme = p_base.getScheme(); 479 } 480 481 // check for authority - RFC 2396 5.2 #4 482 // if we found a host, then we've got a network path, so we're done 483 if (m_host == null) 484 { 485 m_userinfo = p_base.getUserinfo(); 486 m_host = p_base.getHost(); 487 m_port = p_base.getPort(); 488 } 489 else 490 { 491 return; 492 } 493 494 // check for absolute path - RFC 2396 5.2 #5 495 if (m_path.length() > 0 && m_path.startsWith("/")) 496 { 497 return; 498 } 499 500 // if we get to this point, we need to resolve relative path 501 // RFC 2396 5.2 #6 502 String path = new String(); 503 String basePath = p_base.getPath(); 504 505 // 6a - get all but the last segment of the base URI path 506 if (basePath != null) 507 { 508 int lastSlash = basePath.lastIndexOf('/'); 509 510 if (lastSlash != -1) 511 { 512 path = basePath.substring(0, lastSlash + 1); 513 } 514 } 515 516 // 6b - append the relative URI path 517 path = path.concat(m_path); 518 519 // 6c - remove all "./" where "." is a complete path segment 520 index = -1; 521 522 while ((index = path.indexOf("/./")) != -1) 523 { 524 path = path.substring(0, index + 1).concat(path.substring(index + 3)); 525 } 526 527 // 6d - remove "." if path ends with "." as a complete path segment 528 if (path.endsWith("/.")) 529 { 530 path = path.substring(0, path.length() - 1); 531 } 532 533 // 6e - remove all "<segment>/../" where "<segment>" is a complete 534 // path segment not equal to ".." 535 index = -1; 536 537 int segIndex = -1; 538 String tempString = null; 539 540 while ((index = path.indexOf("/../")) > 0) 541 { 542 tempString = path.substring(0, path.indexOf("/../")); 543 segIndex = tempString.lastIndexOf('/'); 544 545 if (segIndex != -1) 546 { 547 if (!tempString.substring(segIndex++).equals("..")) 548 { 549 path = path.substring(0, segIndex).concat(path.substring(index 550 + 4)); 551 } 552 } 553 } 554 555 // 6f - remove ending "<segment>/.." where "<segment>" is a 556 // complete path segment 557 if (path.endsWith("/..")) 558 { 559 tempString = path.substring(0, path.length() - 3); 560 segIndex = tempString.lastIndexOf('/'); 561 562 if (segIndex != -1) 563 { 564 path = path.substring(0, segIndex + 1); 565 } 566 } 567 568 m_path = path; 569 } 570 } 571 572 /** 573 * Initialize the scheme for this URI from a URI string spec. 574 * 575 * @param p_uriSpec the URI specification (cannot be null) 576 * 577 * @throws MalformedURIException if URI does not have a conformant 578 * scheme 579 */ 580 private void initializeScheme(String p_uriSpec) throws MalformedURIException 581 { 582 583 int uriSpecLen = p_uriSpec.length(); 584 int index = 0; 585 String scheme = null; 586 char testChar = '\0'; 587 588 while (index < uriSpecLen) 589 { 590 testChar = p_uriSpec.charAt(index); 591 592 if (testChar == ':' || testChar == '/' || testChar == '?' 593 || testChar == '#') 594 { 595 break; 596 } 597 598 index++; 599 } 600 601 scheme = p_uriSpec.substring(0, index); 602 603 if (scheme.length() == 0) 604 { 605 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI."); 606 } 607 else 608 { 609 setScheme(scheme); 610 } 611 } 612 613 /** 614 * Initialize the authority (userinfo, host and port) for this 615 * URI from a URI string spec. 616 * 617 * @param p_uriSpec the URI specification (cannot be null) 618 * 619 * @throws MalformedURIException if p_uriSpec violates syntax rules 620 */ 621 private void initializeAuthority(String p_uriSpec) 622 throws MalformedURIException 623 { 624 625 int index = 0; 626 int start = 0; 627 int end = p_uriSpec.length(); 628 char testChar = '\0'; 629 String userinfo = null; 630 631 // userinfo is everything up @ 632 if (p_uriSpec.indexOf('@', start) != -1) 633 { 634 while (index < end) 635 { 636 testChar = p_uriSpec.charAt(index); 637 638 if (testChar == '@') 639 { 640 break; 641 } 642 643 index++; 644 } 645 646 userinfo = p_uriSpec.substring(start, index); 647 648 index++; 649 } 650 651 // host is everything up to ':' 652 String host = null; 653 654 start = index; 655 656 while (index < end) 657 { 658 testChar = p_uriSpec.charAt(index); 659 660 if (testChar == ':') 661 { 662 break; 663 } 664 665 index++; 666 } 667 668 host = p_uriSpec.substring(start, index); 669 670 int port = -1; 671 672 if (host.length() > 0) 673 { 674 675 // port 676 if (testChar == ':') 677 { 678 index++; 679 680 start = index; 681 682 while (index < end) 683 { 684 index++; 685 } 686 687 String portStr = p_uriSpec.substring(start, index); 688 689 if (portStr.length() > 0) 690 { 691 for (int i = 0; i < portStr.length(); i++) 692 { 693 if (!isDigit(portStr.charAt(i))) 694 { 695 throw new MalformedURIException( 696 portStr + " is invalid. Port should only contain digits!"); 697 } 698 } 699 700 try 701 { 702 port = Integer.parseInt(portStr); 703 } 704 catch (NumberFormatException nfe) 705 { 706 707 // can't happen 708 } 709 } 710 } 711 } 712 713 setHost(host); 714 setPort(port); 715 setUserinfo(userinfo); 716 } 717 718 /** 719 * Initialize the path for this URI from a URI string spec. 720 * 721 * @param p_uriSpec the URI specification (cannot be null) 722 * 723 * @throws MalformedURIException if p_uriSpec violates syntax rules 724 */ 725 private void initializePath(String p_uriSpec) throws MalformedURIException 726 { 727 728 if (p_uriSpec == null) 729 { 730 throw new MalformedURIException( 731 "Cannot initialize path from null string!"); 732 } 733 734 int index = 0; 735 int start = 0; 736 int end = p_uriSpec.length(); 737 char testChar = '\0'; 738 739 // path - everything up to query string or fragment 740 while (index < end) 741 { 742 testChar = p_uriSpec.charAt(index); 743 744 if (testChar == '?' || testChar == '#') 745 { 746 break; 747 } 748 749 // check for valid escape sequence 750 if (testChar == '%') 751 { 752 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 753 ||!isHex(p_uriSpec.charAt(index + 2))) 754 { 755 throw new MalformedURIException( 756 Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!"); 757 } 758 } 759 else if (!isReservedCharacter(testChar) 760 &&!isUnreservedCharacter(testChar)) 761 { 762 if ('\\' != testChar) 763 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{String.valueOf(testChar)})); //"Path contains invalid character: " 764 //+ testChar); 765 } 766 767 index++; 768 } 769 770 m_path = p_uriSpec.substring(start, index); 771 772 // query - starts with ? and up to fragment or end 773 if (testChar == '?') 774 { 775 index++; 776 777 start = index; 778 779 while (index < end) 780 { 781 testChar = p_uriSpec.charAt(index); 782 783 if (testChar == '#') 784 { 785 break; 786 } 787 788 if (testChar == '%') 789 { 790 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 791 ||!isHex(p_uriSpec.charAt(index + 2))) 792 { 793 throw new MalformedURIException( 794 "Query string contains invalid escape sequence!"); 795 } 796 } 797 else if (!isReservedCharacter(testChar) 798 &&!isUnreservedCharacter(testChar)) 799 { 800 throw new MalformedURIException( 801 "Query string contains invalid character:" + testChar); 802 } 803 804 index++; 805 } 806 807 m_queryString = p_uriSpec.substring(start, index); 808 } 809 810 // fragment - starts with # 811 if (testChar == '#') 812 { 813 index++; 814 815 start = index; 816 817 while (index < end) 818 { 819 testChar = p_uriSpec.charAt(index); 820 821 if (testChar == '%') 822 { 823 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 824 ||!isHex(p_uriSpec.charAt(index + 2))) 825 { 826 throw new MalformedURIException( 827 "Fragment contains invalid escape sequence!"); 828 } 829 } 830 else if (!isReservedCharacter(testChar) 831 &&!isUnreservedCharacter(testChar)) 832 { 833 throw new MalformedURIException( 834 "Fragment contains invalid character:" + testChar); 835 } 836 837 index++; 838 } 839 840 m_fragment = p_uriSpec.substring(start, index); 841 } 842 } 843 844 /** 845 * Get the scheme for this URI. 846 * 847 * @return the scheme for this URI 848 */ 849 public String getScheme() 850 { 851 return m_scheme; 852 } 853 854 /** 855 * Get the scheme-specific part for this URI (everything following the 856 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 857 * 858 * @return the scheme-specific part for this URI 859 */ 860 public String getSchemeSpecificPart() 861 { 862 863 StringBuffer schemespec = new StringBuffer(); 864 865 if (m_userinfo != null || m_host != null || m_port != -1) 866 { 867 schemespec.append("//"); 868 } 869 870 if (m_userinfo != null) 871 { 872 schemespec.append(m_userinfo); 873 schemespec.append('@'); 874 } 875 876 if (m_host != null) 877 { 878 schemespec.append(m_host); 879 } 880 881 if (m_port != -1) 882 { 883 schemespec.append(':'); 884 schemespec.append(m_port); 885 } 886 887 if (m_path != null) 888 { 889 schemespec.append((m_path)); 890 } 891 892 if (m_queryString != null) 893 { 894 schemespec.append('?'); 895 schemespec.append(m_queryString); 896 } 897 898 if (m_fragment != null) 899 { 900 schemespec.append('#'); 901 schemespec.append(m_fragment); 902 } 903 904 return schemespec.toString(); 905 } 906 907 /** 908 * Get the userinfo for this URI. 909 * 910 * @return the userinfo for this URI (null if not specified). 911 */ 912 public String getUserinfo() 913 { 914 return m_userinfo; 915 } 916 917 /** 918 * Get the host for this URI. 919 * 920 * @return the host for this URI (null if not specified). 921 */ 922 public String getHost() 923 { 924 return m_host; 925 } 926 927 /** 928 * Get the port for this URI. 929 * 930 * @return the port for this URI (-1 if not specified). 931 */ 932 public int getPort() 933 { 934 return m_port; 935 } 936 937 /** 938 * Get the path for this URI (optionally with the query string and 939 * fragment). 940 * 941 * @param p_includeQueryString if true (and query string is not null), 942 * then a "?" followed by the query string 943 * will be appended 944 * @param p_includeFragment if true (and fragment is not null), 945 * then a "#" followed by the fragment 946 * will be appended 947 * 948 * @return the path for this URI possibly including the query string 949 * and fragment 950 */ 951 public String getPath(boolean p_includeQueryString, 952 boolean p_includeFragment) 953 { 954 955 StringBuffer pathString = new StringBuffer(m_path); 956 957 if (p_includeQueryString && m_queryString != null) 958 { 959 pathString.append('?'); 960 pathString.append(m_queryString); 961 } 962 963 if (p_includeFragment && m_fragment != null) 964 { 965 pathString.append('#'); 966 pathString.append(m_fragment); 967 } 968 969 return pathString.toString(); 970 } 971 972 /** 973 * Get the path for this URI. Note that the value returned is the path 974 * only and does not include the query string or fragment. 975 * 976 * @return the path for this URI. 977 */ 978 public String getPath() 979 { 980 return m_path; 981 } 982 983 /** 984 * Get the query string for this URI. 985 * 986 * @return the query string for this URI. Null is returned if there 987 * was no "?" in the URI spec, empty string if there was a 988 * "?" but no query string following it. 989 */ 990 public String getQueryString() 991 { 992 return m_queryString; 993 } 994 995 /** 996 * Get the fragment for this URI. 997 * 998 * @return the fragment for this URI. Null is returned if there 999 * was no "#" in the URI spec, empty string if there was a 1000 * "#" but no fragment following it. 1001 */ 1002 public String getFragment() 1003 { 1004 return m_fragment; 1005 } 1006 1007 /** 1008 * Set the scheme for this URI. The scheme is converted to lowercase 1009 * before it is set. 1010 * 1011 * @param p_scheme the scheme for this URI (cannot be null) 1012 * 1013 * @throws MalformedURIException if p_scheme is not a conformant 1014 * scheme name 1015 */ 1016 public void setScheme(String p_scheme) throws MalformedURIException 1017 { 1018 1019 if (p_scheme == null) 1020 { 1021 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!"); 1022 } 1023 1024 if (!isConformantSchemeName(p_scheme)) 1025 { 1026 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant."); 1027 } 1028 1029 m_scheme = p_scheme.toLowerCase(); 1030 } 1031 1032 /** 1033 * Set the userinfo for this URI. If a non-null value is passed in and 1034 * the host value is null, then an exception is thrown. 1035 * 1036 * @param p_userinfo the userinfo for this URI 1037 * 1038 * @throws MalformedURIException if p_userinfo contains invalid 1039 * characters 1040 */ 1041 public void setUserinfo(String p_userinfo) throws MalformedURIException 1042 { 1043 1044 if (p_userinfo == null) 1045 { 1046 m_userinfo = null; 1047 } 1048 else 1049 { 1050 if (m_host == null) 1051 { 1052 throw new MalformedURIException( 1053 "Userinfo cannot be set when host is null!"); 1054 } 1055 1056 // userinfo can contain alphanumerics, mark characters, escaped 1057 // and ';',':','&','=','+','$',',' 1058 int index = 0; 1059 int end = p_userinfo.length(); 1060 char testChar = '\0'; 1061 1062 while (index < end) 1063 { 1064 testChar = p_userinfo.charAt(index); 1065 1066 if (testChar == '%') 1067 { 1068 if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1)) 1069 ||!isHex(p_userinfo.charAt(index + 2))) 1070 { 1071 throw new MalformedURIException( 1072 "Userinfo contains invalid escape sequence!"); 1073 } 1074 } 1075 else if (!isUnreservedCharacter(testChar) 1076 && USERINFO_CHARACTERS.indexOf(testChar) == -1) 1077 { 1078 throw new MalformedURIException( 1079 "Userinfo contains invalid character:" + testChar); 1080 } 1081 1082 index++; 1083 } 1084 } 1085 1086 m_userinfo = p_userinfo; 1087 } 1088 1089 /** 1090 * Set the host for this URI. If null is passed in, the userinfo 1091 * field is also set to null and the port is set to -1. 1092 * 1093 * @param p_host the host for this URI 1094 * 1095 * @throws MalformedURIException if p_host is not a valid IP 1096 * address or DNS hostname. 1097 */ 1098 public void setHost(String p_host) throws MalformedURIException 1099 { 1100 1101 if (p_host == null || p_host.trim().length() == 0) 1102 { 1103 m_host = p_host; 1104 m_userinfo = null; 1105 m_port = -1; 1106 } 1107 else if (!isWellFormedAddress(p_host)) 1108 { 1109 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!"); 1110 } 1111 1112 m_host = p_host; 1113 } 1114 1115 /** 1116 * Set the port for this URI. -1 is used to indicate that the port is 1117 * not specified, otherwise valid port numbers are between 0 and 65535. 1118 * If a valid port number is passed in and the host field is null, 1119 * an exception is thrown. 1120 * 1121 * @param p_port the port number for this URI 1122 * 1123 * @throws MalformedURIException if p_port is not -1 and not a 1124 * valid port number 1125 */ 1126 public void setPort(int p_port) throws MalformedURIException 1127 { 1128 1129 if (p_port >= 0 && p_port <= 65535) 1130 { 1131 if (m_host == null) 1132 { 1133 throw new MalformedURIException( 1134 Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!"); 1135 } 1136 } 1137 else if (p_port != -1) 1138 { 1139 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!"); 1140 } 1141 1142 m_port = p_port; 1143 } 1144 1145 /** 1146 * Set the path for this URI. If the supplied path is null, then the 1147 * query string and fragment are set to null as well. If the supplied 1148 * path includes a query string and/or fragment, these fields will be 1149 * parsed and set as well. Note that, for URIs following the "generic 1150 * URI" syntax, the path specified should start with a slash. 1151 * For URIs that do not follow the generic URI syntax, this method 1152 * sets the scheme-specific part. 1153 * 1154 * @param p_path the path for this URI (may be null) 1155 * 1156 * @throws MalformedURIException if p_path contains invalid 1157 * characters 1158 */ 1159 public void setPath(String p_path) throws MalformedURIException 1160 { 1161 1162 if (p_path == null) 1163 { 1164 m_path = null; 1165 m_queryString = null; 1166 m_fragment = null; 1167 } 1168 else 1169 { 1170 initializePath(p_path); 1171 } 1172 } 1173 1174 /** 1175 * Append to the end of the path of this URI. If the current path does 1176 * not end in a slash and the path to be appended does not begin with 1177 * a slash, a slash will be appended to the current path before the 1178 * new segment is added. Also, if the current path ends in a slash 1179 * and the new segment begins with a slash, the extra slash will be 1180 * removed before the new segment is appended. 1181 * 1182 * @param p_addToPath the new segment to be added to the current path 1183 * 1184 * @throws MalformedURIException if p_addToPath contains syntax 1185 * errors 1186 */ 1187 public void appendPath(String p_addToPath) throws MalformedURIException 1188 { 1189 1190 if (p_addToPath == null || p_addToPath.trim().length() == 0) 1191 { 1192 return; 1193 } 1194 1195 if (!isURIString(p_addToPath)) 1196 { 1197 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{p_addToPath})); //"Path contains invalid character!"); 1198 } 1199 1200 if (m_path == null || m_path.trim().length() == 0) 1201 { 1202 if (p_addToPath.startsWith("/")) 1203 { 1204 m_path = p_addToPath; 1205 } 1206 else 1207 { 1208 m_path = "/" + p_addToPath; 1209 } 1210 } 1211 else if (m_path.endsWith("/")) 1212 { 1213 if (p_addToPath.startsWith("/")) 1214 { 1215 m_path = m_path.concat(p_addToPath.substring(1)); 1216 } 1217 else 1218 { 1219 m_path = m_path.concat(p_addToPath); 1220 } 1221 } 1222 else 1223 { 1224 if (p_addToPath.startsWith("/")) 1225 { 1226 m_path = m_path.concat(p_addToPath); 1227 } 1228 else 1229 { 1230 m_path = m_path.concat("/" + p_addToPath); 1231 } 1232 } 1233 } 1234 1235 /** 1236 * Set the query string for this URI. A non-null value is valid only 1237 * if this is an URI conforming to the generic URI syntax and 1238 * the path value is not null. 1239 * 1240 * @param p_queryString the query string for this URI 1241 * 1242 * @throws MalformedURIException if p_queryString is not null and this 1243 * URI does not conform to the generic 1244 * URI syntax or if the path is null 1245 */ 1246 public void setQueryString(String p_queryString) 1247 throws MalformedURIException 1248 { 1249 1250 if (p_queryString == null) 1251 { 1252 m_queryString = null; 1253 } 1254 else if (!isGenericURI()) 1255 { 1256 throw new MalformedURIException( 1257 "Query string can only be set for a generic URI!"); 1258 } 1259 else if (getPath() == null) 1260 { 1261 throw new MalformedURIException( 1262 "Query string cannot be set when path is null!"); 1263 } 1264 else if (!isURIString(p_queryString)) 1265 { 1266 throw new MalformedURIException( 1267 "Query string contains invalid character!"); 1268 } 1269 else 1270 { 1271 m_queryString = p_queryString; 1272 } 1273 } 1274 1275 /** 1276 * Set the fragment for this URI. A non-null value is valid only 1277 * if this is a URI conforming to the generic URI syntax and 1278 * the path value is not null. 1279 * 1280 * @param p_fragment the fragment for this URI 1281 * 1282 * @throws MalformedURIException if p_fragment is not null and this 1283 * URI does not conform to the generic 1284 * URI syntax or if the path is null 1285 */ 1286 public void setFragment(String p_fragment) throws MalformedURIException 1287 { 1288 1289 if (p_fragment == null) 1290 { 1291 m_fragment = null; 1292 } 1293 else if (!isGenericURI()) 1294 { 1295 throw new MalformedURIException( 1296 Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!"); 1297 } 1298 else if (getPath() == null) 1299 { 1300 throw new MalformedURIException( 1301 Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!"); 1302 } 1303 else if (!isURIString(p_fragment)) 1304 { 1305 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!"); 1306 } 1307 else 1308 { 1309 m_fragment = p_fragment; 1310 } 1311 } 1312 1313 /** 1314 * Determines if the passed-in Object is equivalent to this URI. 1315 * 1316 * @param p_test the Object to test for equality. 1317 * 1318 * @return true if p_test is a URI with all values equal to this 1319 * URI, false otherwise 1320 */ 1321 public boolean equals(Object p_test) 1322 { 1323 1324 if (p_test instanceof URI) 1325 { 1326 URI testURI = (URI) p_test; 1327 1328 if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals( 1329 testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals( 1330 testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals( 1331 testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals( 1332 testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals( 1333 testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals( 1334 testURI.m_fragment)))) 1335 { 1336 return true; 1337 } 1338 } 1339 1340 return false; 1341 } 1342 1343 /** 1344 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1345 * 1346 * @return the URI string specification 1347 */ 1348 public String toString() 1349 { 1350 1351 StringBuffer uriSpecString = new StringBuffer(); 1352 1353 if (m_scheme != null) 1354 { 1355 uriSpecString.append(m_scheme); 1356 uriSpecString.append(':'); 1357 } 1358 1359 uriSpecString.append(getSchemeSpecificPart()); 1360 1361 return uriSpecString.toString(); 1362 } 1363 1364 /** 1365 * Get the indicator as to whether this URI uses the "generic URI" 1366 * syntax. 1367 * 1368 * @return true if this URI uses the "generic URI" syntax, false 1369 * otherwise 1370 */ 1371 public boolean isGenericURI() 1372 { 1373 1374 // presence of the host (whether valid or empty) means 1375 // double-slashes which means generic uri 1376 return (m_host != null); 1377 } 1378 1379 /** 1380 * Determine whether a scheme conforms to the rules for a scheme name. 1381 * A scheme is conformant if it starts with an alphanumeric, and 1382 * contains only alphanumerics, '+','-' and '.'. 1383 * 1384 * 1385 * @param p_scheme The sheme name to check 1386 * @return true if the scheme is conformant, false otherwise 1387 */ 1388 public static boolean isConformantSchemeName(String p_scheme) 1389 { 1390 1391 if (p_scheme == null || p_scheme.trim().length() == 0) 1392 { 1393 return false; 1394 } 1395 1396 if (!isAlpha(p_scheme.charAt(0))) 1397 { 1398 return false; 1399 } 1400 1401 char testChar; 1402 1403 for (int i = 1; i < p_scheme.length(); i++) 1404 { 1405 testChar = p_scheme.charAt(i); 1406 1407 if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1) 1408 { 1409 return false; 1410 } 1411 } 1412 1413 return true; 1414 } 1415 1416 /** 1417 * Determine whether a string is syntactically capable of representing 1418 * a valid IPv4 address or the domain name of a network host. A valid 1419 * IPv4 address consists of four decimal digit groups separated by a 1420 * '.'. A hostname consists of domain labels (each of which must 1421 * begin and end with an alphanumeric but may contain '-') separated 1422 * & by a '.'. See RFC 2396 Section 3.2.2. 1423 * 1424 * 1425 * @param p_address The address string to check 1426 * @return true if the string is a syntactically valid IPv4 address 1427 * or hostname 1428 */ 1429 public static boolean isWellFormedAddress(String p_address) 1430 { 1431 1432 if (p_address == null) 1433 { 1434 return false; 1435 } 1436 1437 String address = p_address.trim(); 1438 int addrLength = address.length(); 1439 1440 if (addrLength == 0 || addrLength > 255) 1441 { 1442 return false; 1443 } 1444 1445 if (address.startsWith(".") || address.startsWith("-")) 1446 { 1447 return false; 1448 } 1449 1450 // rightmost domain label starting with digit indicates IP address 1451 // since top level domain label can only start with an alpha 1452 // see RFC 2396 Section 3.2.2 1453 int index = address.lastIndexOf('.'); 1454 1455 if (address.endsWith(".")) 1456 { 1457 index = address.substring(0, index).lastIndexOf('.'); 1458 } 1459 1460 if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1))) 1461 { 1462 char testChar; 1463 int numDots = 0; 1464 1465 // make sure that 1) we see only digits and dot separators, 2) that 1466 // any dot separator is preceded and followed by a digit and 1467 // 3) that we find 3 dots 1468 for (int i = 0; i < addrLength; i++) 1469 { 1470 testChar = address.charAt(i); 1471 1472 if (testChar == '.') 1473 { 1474 if (!isDigit(address.charAt(i - 1)) 1475 || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1)))) 1476 { 1477 return false; 1478 } 1479 1480 numDots++; 1481 } 1482 else if (!isDigit(testChar)) 1483 { 1484 return false; 1485 } 1486 } 1487 1488 if (numDots != 3) 1489 { 1490 return false; 1491 } 1492 } 1493 else 1494 { 1495 1496 // domain labels can contain alphanumerics and '-" 1497 // but must start and end with an alphanumeric 1498 char testChar; 1499 1500 for (int i = 0; i < addrLength; i++) 1501 { 1502 testChar = address.charAt(i); 1503 1504 if (testChar == '.') 1505 { 1506 if (!isAlphanum(address.charAt(i - 1))) 1507 { 1508 return false; 1509 } 1510 1511 if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1))) 1512 { 1513 return false; 1514 } 1515 } 1516 else if (!isAlphanum(testChar) && testChar != '-') 1517 { 1518 return false; 1519 } 1520 } 1521 } 1522 1523 return true; 1524 } 1525 1526 /** 1527 * Determine whether a char is a digit. 1528 * 1529 * 1530 * @param p_char the character to check 1531 * @return true if the char is betweeen '0' and '9', false otherwise 1532 */ 1533 private static boolean isDigit(char p_char) 1534 { 1535 return p_char >= '0' && p_char <= '9'; 1536 } 1537 1538 /** 1539 * Determine whether a character is a hexadecimal character. 1540 * 1541 * 1542 * @param p_char the character to check 1543 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 1544 * or 'A' and 'F', false otherwise 1545 */ 1546 private static boolean isHex(char p_char) 1547 { 1548 return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f') 1549 || (p_char >= 'A' && p_char <= 'F')); 1550 } 1551 1552 /** 1553 * Determine whether a char is an alphabetic character: a-z or A-Z 1554 * 1555 * 1556 * @param p_char the character to check 1557 * @return true if the char is alphabetic, false otherwise 1558 */ 1559 private static boolean isAlpha(char p_char) 1560 { 1561 return ((p_char >= 'a' && p_char <= 'z') 1562 || (p_char >= 'A' && p_char <= 'Z')); 1563 } 1564 1565 /** 1566 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 1567 * 1568 * 1569 * @param p_char the character to check 1570 * @return true if the char is alphanumeric, false otherwise 1571 */ 1572 private static boolean isAlphanum(char p_char) 1573 { 1574 return (isAlpha(p_char) || isDigit(p_char)); 1575 } 1576 1577 /** 1578 * Determine whether a character is a reserved character: 1579 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ',' 1580 * 1581 * 1582 * @param p_char the character to check 1583 * @return true if the string contains any reserved characters 1584 */ 1585 private static boolean isReservedCharacter(char p_char) 1586 { 1587 return RESERVED_CHARACTERS.indexOf(p_char) != -1; 1588 } 1589 1590 /** 1591 * Determine whether a char is an unreserved character. 1592 * 1593 * 1594 * @param p_char the character to check 1595 * @return true if the char is unreserved, false otherwise 1596 */ 1597 private static boolean isUnreservedCharacter(char p_char) 1598 { 1599 return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1); 1600 } 1601 1602 /** 1603 * Determine whether a given string contains only URI characters (also 1604 * called "uric" in RFC 2396). uric consist of all reserved 1605 * characters, unreserved characters and escaped characters. 1606 * 1607 * 1608 * @param p_uric URI string 1609 * @return true if the string is comprised of uric, false otherwise 1610 */ 1611 private static boolean isURIString(String p_uric) 1612 { 1613 1614 if (p_uric == null) 1615 { 1616 return false; 1617 } 1618 1619 int end = p_uric.length(); 1620 char testChar = '\0'; 1621 1622 for (int i = 0; i < end; i++) 1623 { 1624 testChar = p_uric.charAt(i); 1625 1626 if (testChar == '%') 1627 { 1628 if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1)) 1629 ||!isHex(p_uric.charAt(i + 2))) 1630 { 1631 return false; 1632 } 1633 else 1634 { 1635 i += 2; 1636 1637 continue; 1638 } 1639 } 1640 1641 if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar)) 1642 { 1643 continue; 1644 } 1645 else 1646 { 1647 return false; 1648 } 1649 } 1650 1651 return true; 1652 } 1653 }