- /*
- * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.47 2004/05/13 04:03:25 mbecke Exp $
- * $Revision: 1.47 $
- * $Date: 2004/05/13 04:03:25 $
- *
- * ====================================================================
- *
- * Copyright 2002-2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- *
- */
- package org.apache.commons.httpclient;
- import java.io.IOException;
- import java.io.ObjectInputStream;
- import java.io.ObjectOutputStream;
- import java.io.Serializable;
- import java.util.Locale;
- import java.util.BitSet;
- import java.util.Hashtable;
- import org.apache.commons.codec.DecoderException;
- import org.apache.commons.codec.net.URLCodec;
- import org.apache.commons.httpclient.util.EncodingUtil;
- /**
- * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
- * This class has the purpose of supportting of parsing a URI reference to
- * extend any specific protocols, the character encoding of the protocol to
- * be transported and the charset of the document.
- * <p>
- * A URI is always in an "escaped" form, since escaping or unescaping a
- * completed URI might change its semantics.
- * <p>
- * Implementers should be careful not to escape or unescape the same string
- * more than once, since unescaping an already unescaped string might lead to
- * misinterpreting a percent data character as another escaped character,
- * or vice versa in the case of escaping an already escaped string.
- * <p>
- * In order to avoid these problems, data types used as follows:
- * <p><blockquote><pre>
- * URI character sequence: char
- * octet sequence: byte
- * original character sequence: String
- * </pre></blockquote><p>
- *
- * So, a URI is a sequence of characters as an array of a char type, which
- * is not always represented as a sequence of octets as an array of byte.
- * <p>
- *
- * URI Syntactic Components
- * <p><blockquote><pre>
- * - In general, written as follows:
- * Absolute URI = <scheme>:<scheme-specific-part>
- * Generic URI = <scheme>://<authority><path>?<query>
- *
- * - Syntax
- * absoluteURI = scheme ":" ( hier_part | opaque_part )
- * hier_part = ( net_path | abs_path ) [ "?" query ]
- * net_path = "//" authority [ abs_path ]
- * abs_path = "/" path_segments
- * </pre></blockquote><p>
- *
- * The following examples illustrate URI that are in common use.
- * <pre>
- * ftp://ftp.is.co.za/rfc/rfc1808.txt
- * -- ftp scheme for File Transfer Protocol services
- * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
- * -- gopher scheme for Gopher and Gopher+ Protocol services
- * http://www.math.uio.no/faq/compression-faq/part1.html
- * -- http scheme for Hypertext Transfer Protocol services
- * mailto:mduerst@ifi.unizh.ch
- * -- mailto scheme for electronic mail addresses
- * news:comp.infosystems.www.servers.unix
- * -- news scheme for USENET news groups and articles
- * telnet://melvyl.ucop.edu/
- * -- telnet scheme for interactive services via the TELNET Protocol
- * </pre>
- * Please, notice that there are many modifications from URL(RFC 1738) and
- * relative URL(RFC 1808).
- * <p>
- * <b>The expressions for a URI</b>
- * <p><pre>
- * For escaped URI forms
- * - URI(char[]) // constructor
- * - char[] getRawXxx() // method
- * - String getEscapedXxx() // method
- * - String toString() // method
- * <p>
- * For unescaped URI forms
- * - URI(String) // constructor
- * - String getXXX() // method
- * </pre><p>
- *
- * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
- * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
- * @version $Revision: 1.47 $ $Date: 2002/03/14 15:14:01
- */
- public class URI implements Cloneable, Comparable, Serializable {
- // ----------------------------------------------------------- Constructors
- /** Create an instance as an internal use */
- protected URI() {
- }
- /**
- * Construct a URI from a string with the given charset. The input string can
- * be either in escaped or unescaped form.
- *
- * @param s URI character sequence
- * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
- * <tt>false</tt> otherwise.
- * @param charset the charset string to do escape encoding, if required
- *
- * @throws URIException If the URI cannot be created.
- * @throws NullPointerException if input string is <code>null</code>
- *
- * @see #getProtocolCharset
- *
- * @since 3.0
- */
- public URI(String s, boolean escaped, String charset)
- throws URIException, NullPointerException {
- protocolCharset = charset;
- parseUriReference(s, escaped);
- }
- /**
- * Construct a URI from a string with the given charset. The input string can
- * be either in escaped or unescaped form.
- *
- * @param s URI character sequence
- * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
- * <tt>false</tt> otherwise.
- *
- * @throws URIException If the URI cannot be created.
- * @throws NullPointerException if input string is <code>null</code>
- *
- * @see #getProtocolCharset
- *
- * @since 3.0
- */
- public URI(String s, boolean escaped)
- throws URIException, NullPointerException {
- parseUriReference(s, escaped);
- }
- /**
- * Construct a URI as an escaped form of a character array with the given
- * charset.
- *
- * @param escaped the URI character sequence
- * @param charset the charset string to do escape encoding
- * @throws URIException If the URI cannot be created.
- * @throws NullPointerException if <code>escaped</code> is <code>null</code>
- * @see #getProtocolCharset
- *
- * @deprecated Use #URI(String, boolean, String)
- */
- public URI(char[] escaped, String charset)
- throws URIException, NullPointerException {
- protocolCharset = charset;
- parseUriReference(new String(escaped), true);
- }
- /**
- * Construct a URI as an escaped form of a character array.
- * An URI can be placed within double-quotes or angle brackets like
- * "http://test.com/" and <http://test.com/>
- *
- * @param escaped the URI character sequence
- * @throws URIException If the URI cannot be created.
- * @throws NullPointerException if <code>escaped</code> is <code>null</code>
- * @see #getDefaultProtocolCharset
- *
- * @deprecated Use #URI(String, boolean)
- */
- public URI(char[] escaped)
- throws URIException, NullPointerException {
- parseUriReference(new String(escaped), true);
- }
- /**
- * Construct a URI from the given string with the given charset.
- *
- * @param original the string to be represented to URI character sequence
- * It is one of absoluteURI and relativeURI.
- * @param charset the charset string to do escape encoding
- * @throws URIException If the URI cannot be created.
- * @see #getProtocolCharset
- *
- * @deprecated Use #URI(String, boolean, String)
- */
- public URI(String original, String charset) throws URIException {
- protocolCharset = charset;
- parseUriReference(original, false);
- }
- /**
- * Construct a URI from the given string.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * </pre></blockquote><p>
- * An URI can be placed within double-quotes or angle brackets like
- * "http://test.com/" and <http://test.com/>
- *
- * @param original the string to be represented to URI character sequence
- * It is one of absoluteURI and relativeURI.
- * @throws URIException If the URI cannot be created.
- * @see #getDefaultProtocolCharset
- *
- * @deprecated Use #URI(String, boolean)
- */
- public URI(String original) throws URIException {
- parseUriReference(original, false);
- }
- /**
- * Construct a general URI from the given components.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * absoluteURI = scheme ":" ( hier_part | opaque_part )
- * opaque_part = uric_no_slash *uric
- * </pre></blockquote><p>
- * It's for absolute URI = <scheme>:<scheme-specific-part>#
- * <fragment>.
- *
- * @param scheme the scheme string
- * @param schemeSpecificPart scheme_specific_part
- * @param fragment the fragment string
- * @throws URIException If the URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String schemeSpecificPart, String fragment)
- throws URIException {
- // validate and contruct the URI character sequence
- if (scheme == null) {
- throw new URIException(URIException.PARSING, "scheme required");
- }
- char[] s = scheme.toLowerCase().toCharArray();
- if (validate(s, URI.scheme)) {
- _scheme = s; // is_absoluteURI
- } else {
- throw new URIException(URIException.PARSING, "incorrect scheme");
- }
- _opaque = encode(schemeSpecificPart, allowed_opaque_part,
- getProtocolCharset());
- // Set flag
- _is_opaque_part = true;
- _fragment = fragment.toCharArray();
- setURI();
- }
- /**
- * Construct a general URI from the given components.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * absoluteURI = scheme ":" ( hier_part | opaque_part )
- * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- * hier_part = ( net_path | abs_path ) [ "?" query ]
- * </pre></blockquote><p>
- * It's for absolute URI = <scheme>:<path>?<query>#<
- * fragment> and relative URI = <path>?<query>#<fragment
- * >.
- *
- * @param scheme the scheme string
- * @param authority the authority string
- * @param path the path string
- * @param query the query string
- * @param fragment the fragment string
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String authority, String path, String query,
- String fragment) throws URIException {
- // validate and contruct the URI character sequence
- StringBuffer buff = new StringBuffer();
- if (scheme != null) {
- buff.append(scheme);
- buff.append(':');
- }
- if (authority != null) {
- buff.append("//");
- buff.append(authority);
- }
- if (path != null) { // accept empty path
- if ((scheme != null || authority != null)
- && !path.startsWith("/")) {
- throw new URIException(URIException.PARSING,
- "abs_path requested");
- }
- buff.append(path);
- }
- if (query != null) {
- buff.append('?');
- buff.append(query);
- }
- if (fragment != null) {
- buff.append('#');
- buff.append(fragment);
- }
- parseUriReference(buff.toString(), false);
- }
- /**
- * Construct a general URI from the given components.
- *
- * @param scheme the scheme string
- * @param userinfo the userinfo string
- * @param host the host string
- * @param port the port number
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String userinfo, String host, int port)
- throws URIException {
- this(scheme, userinfo, host, port, null, null, null);
- }
- /**
- * Construct a general URI from the given components.
- *
- * @param scheme the scheme string
- * @param userinfo the userinfo string
- * @param host the host string
- * @param port the port number
- * @param path the path string
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String userinfo, String host, int port,
- String path) throws URIException {
- this(scheme, userinfo, host, port, path, null, null);
- }
- /**
- * Construct a general URI from the given components.
- *
- * @param scheme the scheme string
- * @param userinfo the userinfo string
- * @param host the host string
- * @param port the port number
- * @param path the path string
- * @param query the query string
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String userinfo, String host, int port,
- String path, String query) throws URIException {
- this(scheme, userinfo, host, port, path, query, null);
- }
- /**
- * Construct a general URI from the given components.
- *
- * @param scheme the scheme string
- * @param userinfo the userinfo string
- * @param host the host string
- * @param port the port number
- * @param path the path string
- * @param query the query string
- * @param fragment the fragment string
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String userinfo, String host, int port,
- String path, String query, String fragment) throws URIException {
- this(scheme, (host == null) ? null
- : ((userinfo != null) ? userinfo + '@' : "") + host
- + ((port != -1) ? ":" + port : ""), path, query, fragment);
- }
- /**
- * Construct a general URI from the given components.
- *
- * @param scheme the scheme string
- * @param host the host string
- * @param path the path string
- * @param fragment the fragment string
- * @throws URIException If the new URI cannot be created.
- * @see #getDefaultProtocolCharset
- */
- public URI(String scheme, String host, String path, String fragment)
- throws URIException {
- this(scheme, host, path, null, fragment);
- }
- /**
- * Construct a general URI with the given relative URI string.
- *
- * @param base the base URI
- * @param relative the relative URI string
- * @throws URIException If the new URI cannot be created.
- *
- * @deprecated Use #URI(URI, String, boolean)
- */
- public URI(URI base, String relative) throws URIException {
- this(base, new URI(relative));
- }
- /**
- * Construct a general URI with the given relative URI string.
- *
- * @param base the base URI
- * @param relative the relative URI string
- * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
- * <tt>false</tt> otherwise.
- *
- * @throws URIException If the new URI cannot be created.
- *
- * @since 3.0
- */
- public URI(URI base, String relative, boolean escaped) throws URIException {
- this(base, new URI(relative, escaped));
- }
- /**
- * Construct a general URI with the given relative URI.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- * </pre></blockquote><p>
- * Resolving Relative References to Absolute Form.
- *
- * <strong>Examples of Resolving Relative URI References</strong>
- *
- * Within an object with a well-defined base URI of
- * <p><blockquote><pre>
- * http://a/b/c/d;p?q
- * </pre></blockquote><p>
- * the relative URI would be resolved as follows:
- *
- * Normal Examples
- *
- * <p><blockquote><pre>
- * g:h = g:h
- * g = http://a/b/c/g
- * ./g = http://a/b/c/g
- * g/ = http://a/b/c/g/
- * /g = http://a/g
- * //g = http://g
- * ?y = http://a/b/c/?y
- * g?y = http://a/b/c/g?y
- * #s = (current document)#s
- * g#s = http://a/b/c/g#s
- * g?y#s = http://a/b/c/g?y#s
- * ;x = http://a/b/c/;x
- * g;x = http://a/b/c/g;x
- * g;x?y#s = http://a/b/c/g;x?y#s
- * . = http://a/b/c/
- * ./ = http://a/b/c/
- * .. = http://a/b/
- * ../ = http://a/b/
- * ../g = http://a/b/g
- * ../.. = http://a/
- * ../../ = http://a/
- * ../../g = http://a/g
- * </pre></blockquote><p>
- *
- * Some URI schemes do not allow a hierarchical syntax matching the
- * <hier_part> syntax, and thus cannot use relative references.
- *
- * @param base the base URI
- * @param relative the relative URI
- * @throws URIException If the new URI cannot be created.
- */
- public URI(URI base, URI relative) throws URIException {
- if (base._scheme == null) {
- throw new URIException(URIException.PARSING, "base URI required");
- }
- if (base._scheme != null) {
- this._scheme = base._scheme;
- this._authority = base._authority;
- }
- if (base._is_opaque_part || relative._is_opaque_part) {
- this._scheme = base._scheme;
- this._is_opaque_part = base._is_opaque_part
- || relative._is_opaque_part;
- this._opaque = relative._opaque;
- this._fragment = relative._fragment;
- this.setURI();
- return;
- }
- if (relative._scheme != null) {
- this._scheme = relative._scheme;
- this._is_net_path = relative._is_net_path;
- this._authority = relative._authority;
- if (relative._is_server) {
- this._is_server = relative._is_server;
- this._userinfo = relative._userinfo;
- this._host = relative._host;
- this._port = relative._port;
- } else if (relative._is_reg_name) {
- this._is_reg_name = relative._is_reg_name;
- }
- this._is_abs_path = relative._is_abs_path;
- this._is_rel_path = relative._is_rel_path;
- this._path = relative._path;
- } else if (base._authority != null && relative._scheme == null) {
- this._is_net_path = base._is_net_path;
- this._authority = base._authority;
- if (base._is_server) {
- this._is_server = base._is_server;
- this._userinfo = base._userinfo;
- this._host = base._host;
- this._port = base._port;
- } else if (base._is_reg_name) {
- this._is_reg_name = base._is_reg_name;
- }
- }
- if (relative._authority != null) {
- this._is_net_path = relative._is_net_path;
- this._authority = relative._authority;
- if (relative._is_server) {
- this._is_server = relative._is_server;
- this._userinfo = relative._userinfo;
- this._host = relative._host;
- this._port = relative._port;
- } else if (relative._is_reg_name) {
- this._is_reg_name = relative._is_reg_name;
- }
- this._is_abs_path = relative._is_abs_path;
- this._is_rel_path = relative._is_rel_path;
- this._path = relative._path;
- }
- // resolve the path and query if necessary
- if (relative._scheme == null && relative._authority == null) {
- if ((relative._path == null || relative._path.length == 0)
- && relative._query == null) {
- // handle a reference to the current document, see RFC 2396
- // section 5.2 step 2
- this._path = base._path;
- this._query = base._query;
- } else {
- this._path = resolvePath(base._path, relative._path);
- }
- }
- // base._query removed
- if (relative._query != null) {
- this._query = relative._query;
- }
- // base._fragment removed
- if (relative._fragment != null) {
- this._fragment = relative._fragment;
- }
- this.setURI();
- // reparse the newly built URI, this will ensure that all flags are set correctly.
- // TODO there must be a better way to do this
- parseUriReference(new String(_uri), true);
- }
- // --------------------------------------------------- Instance Variables
- /** Version ID for serialization */
- static final long serialVersionUID = 604752400577948726L;
- /**
- * Cache the hash code for this URI.
- */
- protected int hash = 0;
- /**
- * This Uniform Resource Identifier (URI).
- * The URI is always in an "escaped" form, since escaping or unescaping
- * a completed URI might change its semantics.
- */
- protected char[] _uri = null;
- /**
- * The charset of the protocol used by this URI instance.
- */
- protected String protocolCharset = null;
- /**
- * The default charset of the protocol. RFC 2277, 2396
- */
- protected static String defaultProtocolCharset = "UTF-8";
- /**
- * The default charset of the document. RFC 2277, 2396
- * The platform's charset is used for the document by default.
- */
- protected static String defaultDocumentCharset = null;
- protected static String defaultDocumentCharsetByLocale = null;
- protected static String defaultDocumentCharsetByPlatform = null;
- // Static initializer for defaultDocumentCharset
- static {
- Locale locale = Locale.getDefault();
- // in order to support backward compatiblity
- if (locale != null) {
- defaultDocumentCharsetByLocale =
- LocaleToCharsetMap.getCharset(locale);
- // set the default document charset
- defaultDocumentCharset = defaultDocumentCharsetByLocale;
- }
- // in order to support platform encoding
- try {
- defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
- } catch (SecurityException ignore) {
- }
- if (defaultDocumentCharset == null) {
- // set the default document charset
- defaultDocumentCharset = defaultDocumentCharsetByPlatform;
- }
- }
- /**
- * The scheme.
- */
- protected char[] _scheme = null;
- /**
- * The opaque.
- */
- protected char[] _opaque = null;
- /**
- * The authority.
- */
- protected char[] _authority = null;
- /**
- * The userinfo.
- */
- protected char[] _userinfo = null;
- /**
- * The host.
- */
- protected char[] _host = null;
- /**
- * The port.
- */
- protected int _port = -1;
- /**
- * The path.
- */
- protected char[] _path = null;
- /**
- * The query.
- */
- protected char[] _query = null;
- /**
- * The fragment.
- */
- protected char[] _fragment = null;
- /**
- * The root path.
- */
- protected static char[] rootPath = { '/' };
- // ---------------------- Generous characters for each component validation
- /**
- * The percent "%" character always has the reserved purpose of being the
- * escape indicator, it must be escaped as "%25" in order to be used as
- * data within a URI.
- */
- protected static final BitSet percent = new BitSet(256);
- // Static initializer for percent
- static {
- percent.set('%');
- }
- /**
- * BitSet for digit.
- * <p><blockquote><pre>
- * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
- * "8" | "9"
- * </pre></blockquote><p>
- */
- protected static final BitSet digit = new BitSet(256);
- // Static initializer for digit
- static {
- for (int i = '0'; i <= '9'; i++) {
- digit.set(i);
- }
- }
- /**
- * BitSet for alpha.
- * <p><blockquote><pre>
- * alpha = lowalpha | upalpha
- * </pre></blockquote><p>
- */
- protected static final BitSet alpha = new BitSet(256);
- // Static initializer for alpha
- static {
- for (int i = 'a'; i <= 'z'; i++) {
- alpha.set(i);
- }
- for (int i = 'A'; i <= 'Z'; i++) {
- alpha.set(i);
- }
- }
- /**
- * BitSet for alphanum (join of alpha & digit).
- * <p><blockquote><pre>
- * alphanum = alpha | digit
- * </pre></blockquote><p>
- */
- protected static final BitSet alphanum = new BitSet(256);
- // Static initializer for alphanum
- static {
- alphanum.or(alpha);
- alphanum.or(digit);
- }
- /**
- * BitSet for hex.
- * <p><blockquote><pre>
- * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
- * "a" | "b" | "c" | "d" | "e" | "f"
- * </pre></blockquote><p>
- */
- protected static final BitSet hex = new BitSet(256);
- // Static initializer for hex
- static {
- hex.or(digit);
- for (int i = 'a'; i <= 'f'; i++) {
- hex.set(i);
- }
- for (int i = 'A'; i <= 'F'; i++) {
- hex.set(i);
- }
- }
- /**
- * BitSet for escaped.
- * <p><blockquote><pre>
- * escaped = "%" hex hex
- * </pre></blockquote><p>
- */
- protected static final BitSet escaped = new BitSet(256);
- // Static initializer for escaped
- static {
- escaped.or(percent);
- escaped.or(hex);
- }
- /**
- * BitSet for mark.
- * <p><blockquote><pre>
- * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
- * "(" | ")"
- * </pre></blockquote><p>
- */
- protected static final BitSet mark = new BitSet(256);
- // Static initializer for mark
- static {
- mark.set('-');
- mark.set('_');
- mark.set('.');
- mark.set('!');
- mark.set('~');
- mark.set('*');
- mark.set('\'');
- mark.set('(');
- mark.set(')');
- }
- /**
- * Data characters that are allowed in a URI but do not have a reserved
- * purpose are called unreserved.
- * <p><blockquote><pre>
- * unreserved = alphanum | mark
- * </pre></blockquote><p>
- */
- protected static final BitSet unreserved = new BitSet(256);
- // Static initializer for unreserved
- static {
- unreserved.or(alphanum);
- unreserved.or(mark);
- }
- /**
- * BitSet for reserved.
- * <p><blockquote><pre>
- * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
- * "$" | ","
- * </pre></blockquote><p>
- */
- protected static final BitSet reserved = new BitSet(256);
- // Static initializer for reserved
- static {
- reserved.set(';');
- reserved.set('/');
- reserved.set('?');
- reserved.set(':');
- reserved.set('@');
- reserved.set('&');
- reserved.set('=');
- reserved.set('+');
- reserved.set('$');
- reserved.set(',');
- }
- /**
- * BitSet for uric.
- * <p><blockquote><pre>
- * uric = reserved | unreserved | escaped
- * </pre></blockquote><p>
- */
- protected static final BitSet uric = new BitSet(256);
- // Static initializer for uric
- static {
- uric.or(reserved);
- uric.or(unreserved);
- uric.or(escaped);
- }
- /**
- * BitSet for fragment (alias for uric).
- * <p><blockquote><pre>
- * fragment = *uric
- * </pre></blockquote><p>
- */
- protected static final BitSet fragment = uric;
- /**
- * BitSet for query (alias for uric).
- * <p><blockquote><pre>
- * query = *uric
- * </pre></blockquote><p>
- */
- protected static final BitSet query = uric;
- /**
- * BitSet for pchar.
- * <p><blockquote><pre>
- * pchar = unreserved | escaped |
- * ":" | "@" | "&" | "=" | "+" | "$" | ","
- * </pre></blockquote><p>
- */
- protected static final BitSet pchar = new BitSet(256);
- // Static initializer for pchar
- static {
- pchar.or(unreserved);
- pchar.or(escaped);
- pchar.set(':');
- pchar.set('@');
- pchar.set('&');
- pchar.set('=');
- pchar.set('+');
- pchar.set('$');
- pchar.set(',');
- }
- /**
- * BitSet for param (alias for pchar).
- * <p><blockquote><pre>
- * param = *pchar
- * </pre></blockquote><p>
- */
- protected static final BitSet param = pchar;
- /**
- * BitSet for segment.
- * <p><blockquote><pre>
- * segment = *pchar *( ";" param )
- * </pre></blockquote><p>
- */
- protected static final BitSet segment = new BitSet(256);
- // Static initializer for segment
- static {
- segment.or(pchar);
- segment.set(';');
- segment.or(param);
- }
- /**
- * BitSet for path segments.
- * <p><blockquote><pre>
- * path_segments = segment *( "/" segment )
- * </pre></blockquote><p>
- */
- protected static final BitSet path_segments = new BitSet(256);
- // Static initializer for path_segments
- static {
- path_segments.set('/');
- path_segments.or(segment);
- }
- /**
- * URI absolute path.
- * <p><blockquote><pre>
- * abs_path = "/" path_segments
- * </pre></blockquote><p>
- */
- protected static final BitSet abs_path = new BitSet(256);
- // Static initializer for abs_path
- static {
- abs_path.set('/');
- abs_path.or(path_segments);
- }
- /**
- * URI bitset for encoding typical non-slash characters.
- * <p><blockquote><pre>
- * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
- * "&" | "=" | "+" | "$" | ","
- * </pre></blockquote><p>
- */
- protected static final BitSet uric_no_slash = new BitSet(256);
- // Static initializer for uric_no_slash
- static {
- uric_no_slash.or(unreserved);
- uric_no_slash.or(escaped);
- uric_no_slash.set(';');
- uric_no_slash.set('?');
- uric_no_slash.set(';');
- uric_no_slash.set('@');
- uric_no_slash.set('&');
- uric_no_slash.set('=');
- uric_no_slash.set('+');
- uric_no_slash.set('$');
- uric_no_slash.set(',');
- }
- /**
- * URI bitset that combines uric_no_slash and uric.
- * <p><blockquote><pre>
- * opaque_part = uric_no_slash *uric
- * </pre></blockquote><p>
- */
- protected static final BitSet opaque_part = new BitSet(256);
- // Static initializer for opaque_part
- static {
- // it's generous. because first character must not include a slash
- opaque_part.or(uric_no_slash);
- opaque_part.or(uric);
- }
- /**
- * URI bitset that combines absolute path and opaque part.
- * <p><blockquote><pre>
- * path = [ abs_path | opaque_part ]
- * </pre></blockquote><p>
- */
- protected static final BitSet path = new BitSet(256);
- // Static initializer for path
- static {
- path.or(abs_path);
- path.or(opaque_part);
- }
- /**
- * Port, a logical alias for digit.
- */
- protected static final BitSet port = digit;
- /**
- * Bitset that combines digit and dot fo IPv$address.
- * <p><blockquote><pre>
- * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
- * </pre></blockquote><p>
- */
- protected static final BitSet IPv4address = new BitSet(256);
- // Static initializer for IPv4address
- static {
- IPv4address.or(digit);
- IPv4address.set('.');
- }
- /**
- * RFC 2373.
- * <p><blockquote><pre>
- * IPv6address = hexpart [ ":" IPv4address ]
- * </pre></blockquote><p>
- */
- protected static final BitSet IPv6address = new BitSet(256);
- // Static initializer for IPv6address reference
- static {
- IPv6address.or(hex); // hexpart
- IPv6address.set(':');
- IPv6address.or(IPv4address);
- }
- /**
- * RFC 2732, 2373.
- * <p><blockquote><pre>
- * IPv6reference = "[" IPv6address "]"
- * </pre></blockquote><p>
- */
- protected static final BitSet IPv6reference = new BitSet(256);
- // Static initializer for IPv6reference
- static {
- IPv6reference.set('[');
- IPv6reference.or(IPv6address);
- IPv6reference.set(']');
- }
- /**
- * BitSet for toplabel.
- * <p><blockquote><pre>
- * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
- * </pre></blockquote><p>
- */
- protected static final BitSet toplabel = new BitSet(256);
- // Static initializer for toplabel
- static {
- toplabel.or(alphanum);
- toplabel.set('-');
- }
- /**
- * BitSet for domainlabel.
- * <p><blockquote><pre>
- * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
- * </pre></blockquote><p>
- */
- protected static final BitSet domainlabel = toplabel;
- /**
- * BitSet for hostname.
- * <p><blockquote><pre>
- * hostname = *( domainlabel "." ) toplabel [ "." ]
- * </pre></blockquote><p>
- */
- protected static final BitSet hostname = new BitSet(256);
- // Static initializer for hostname
- static {
- hostname.or(toplabel);
- // hostname.or(domainlabel);
- hostname.set('.');
- }
- /**
- * BitSet for host.
- * <p><blockquote><pre>
- * host = hostname | IPv4address | IPv6reference
- * </pre></blockquote><p>
- */
- protected static final BitSet host = new BitSet(256);
- // Static initializer for host
- static {
- host.or(hostname);
- // host.or(IPv4address);
- host.or(IPv6reference); // IPv4address
- }
- /**
- * BitSet for hostport.
- * <p><blockquote><pre>
- * hostport = host [ ":" port ]
- * </pre></blockquote><p>
- */
- protected static final BitSet hostport = new BitSet(256);
- // Static initializer for hostport
- static {
- hostport.or(host);
- hostport.set(':');
- hostport.or(port);
- }
- /**
- * Bitset for userinfo.
- * <p><blockquote><pre>
- * userinfo = *( unreserved | escaped |
- * ";" | ":" | "&" | "=" | "+" | "$" | "," )
- * </pre></blockquote><p>
- */
- protected static final BitSet userinfo = new BitSet(256);
- // Static initializer for userinfo
- static {
- userinfo.or(unreserved);
- userinfo.or(escaped);
- userinfo.set(';');
- userinfo.set(':');
- userinfo.set('&');
- userinfo.set('=');
- userinfo.set('+');
- userinfo.set('$');
- userinfo.set(',');
- }
- /**
- * BitSet for within the userinfo component like user and password.
- */
- public static final BitSet within_userinfo = new BitSet(256);
- // Static initializer for within_userinfo
- static {
- within_userinfo.or(userinfo);
- within_userinfo.clear(';'); // reserved within authority
- within_userinfo.clear(':');
- within_userinfo.clear('@');
- within_userinfo.clear('?');
- within_userinfo.clear('/');
- }
- /**
- * Bitset for server.
- * <p><blockquote><pre>
- * server = [ [ userinfo "@" ] hostport ]
- * </pre></blockquote><p>
- */
- protected static final BitSet server = new BitSet(256);
- // Static initializer for server
- static {
- server.or(userinfo);
- server.set('@');
- server.or(hostport);
- }
- /**
- * BitSet for reg_name.
- * <p><blockquote><pre>
- * reg_name = 1*( unreserved | escaped | "$" | "," |
- * ";" | ":" | "@" | "&" | "=" | "+" )
- * </pre></blockquote><p>
- */
- protected static final BitSet reg_name = new BitSet(256);
- // Static initializer for reg_name
- static {
- reg_name.or(unreserved);
- reg_name.or(escaped);
- reg_name.set('$');
- reg_name.set(',');
- reg_name.set(';');
- reg_name.set(':');
- reg_name.set('@');
- reg_name.set('&');
- reg_name.set('=');
- reg_name.set('+');
- }
- /**
- * BitSet for authority.
- * <p><blockquote><pre>
- * authority = server | reg_name
- * </pre></blockquote><p>
- */
- protected static final BitSet authority = new BitSet(256);
- // Static initializer for authority
- static {
- authority.or(server);
- authority.or(reg_name);
- }
- /**
- * BitSet for scheme.
- * <p><blockquote><pre>
- * scheme = alpha *( alpha | digit | "+" | "-" | "." )
- * </pre></blockquote><p>
- */
- protected static final BitSet scheme = new BitSet(256);
- // Static initializer for scheme
- static {
- scheme.or(alpha);
- scheme.or(digit);
- scheme.set('+');
- scheme.set('-');
- scheme.set('.');
- }
- /**
- * BitSet for rel_segment.
- * <p><blockquote><pre>
- * rel_segment = 1*( unreserved | escaped |
- * ";" | "@" | "&" | "=" | "+" | "$" | "," )
- * </pre></blockquote><p>
- */
- protected static final BitSet rel_segment = new BitSet(256);
- // Static initializer for rel_segment
- static {
- rel_segment.or(unreserved);
- rel_segment.or(escaped);
- rel_segment.set(';');
- rel_segment.set('@');
- rel_segment.set('&');
- rel_segment.set('=');
- rel_segment.set('+');
- rel_segment.set('$');
- rel_segment.set(',');
- }
- /**
- * BitSet for rel_path.
- * <p><blockquote><pre>
- * rel_path = rel_segment [ abs_path ]
- * </pre></blockquote><p>
- */
- protected static final BitSet rel_path = new BitSet(256);
- // Static initializer for rel_path
- static {
- rel_path.or(rel_segment);
- rel_path.or(abs_path);
- }
- /**
- * BitSet for net_path.
- * <p><blockquote><pre>
- * net_path = "//" authority [ abs_path ]
- * </pre></blockquote><p>
- */
- protected static final BitSet net_path = new BitSet(256);
- // Static initializer for net_path
- static {
- net_path.set('/');
- net_path.or(authority);
- net_path.or(abs_path);
- }
- /**
- * BitSet for hier_part.
- * <p><blockquote><pre>
- * hier_part = ( net_path | abs_path ) [ "?" query ]
- * </pre></blockquote><p>
- */
- protected static final BitSet hier_part = new BitSet(256);
- // Static initializer for hier_part
- static {
- hier_part.or(net_path);
- hier_part.or(abs_path);
- // hier_part.set('?'); aleady included
- hier_part.or(query);
- }
- /**
- * BitSet for relativeURI.
- * <p><blockquote><pre>
- * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- * </pre></blockquote><p>
- */
- protected static final BitSet relativeURI = new BitSet(256);
- // Static initializer for relativeURI
- static {
- relativeURI.or(net_path);
- relativeURI.or(abs_path);
- relativeURI.or(rel_path);
- // relativeURI.set('?'); aleady included
- relativeURI.or(query);
- }
- /**
- * BitSet for absoluteURI.
- * <p><blockquote><pre>
- * absoluteURI = scheme ":" ( hier_part | opaque_part )
- * </pre></blockquote><p>
- */
- protected static final BitSet absoluteURI = new BitSet(256);
- // Static initializer for absoluteURI
- static {
- absoluteURI.or(scheme);
- absoluteURI.set(':');
- absoluteURI.or(hier_part);
- absoluteURI.or(opaque_part);
- }
- /**
- * BitSet for URI-reference.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * </pre></blockquote><p>
- */
- protected static final BitSet URI_reference = new BitSet(256);
- // Static initializer for URI_reference
- static {
- URI_reference.or(absoluteURI);
- URI_reference.or(relativeURI);
- URI_reference.set('#');
- URI_reference.or(fragment);
- }
- // ---------------------------- Characters disallowed within the URI syntax
- // Excluded US-ASCII Characters are like control, space, delims and unwise
- /**
- * BitSet for control.
- */
- public static final BitSet control = new BitSet(256);
- // Static initializer for control
- static {
- for (int i = 0; i <= 0x1F; i++) {
- control.set(i);
- }
- control.set(0x7F);
- }
- /**
- * BitSet for space.
- */
- public static final BitSet space = new BitSet(256);
- // Static initializer for space
- static {
- space.set(0x20);
- }
- /**
- * BitSet for delims.
- */
- public static final BitSet delims = new BitSet(256);
- // Static initializer for delims
- static {
- delims.set('<');
- delims.set('>');
- delims.set('#');
- delims.set('%');
- delims.set('"');
- }
- /**
- * BitSet for unwise.
- */
- public static final BitSet unwise = new BitSet(256);
- // Static initializer for unwise
- static {
- unwise.set('{');
- unwise.set('}');
- unwise.set('|');
- unwise.set('\\');
- unwise.set('^');
- unwise.set('[');
- unwise.set(']');
- unwise.set('`');
- }
- /**
- * Disallowed rel_path before escaping.
- */
- public static final BitSet disallowed_rel_path = new BitSet(256);
- // Static initializer for disallowed_rel_path
- static {
- disallowed_rel_path.or(uric);
- disallowed_rel_path.andNot(rel_path);
- }
- /**
- * Disallowed opaque_part before escaping.
- */
- public static final BitSet disallowed_opaque_part = new BitSet(256);
- // Static initializer for disallowed_opaque_part
- static {
- disallowed_opaque_part.or(uric);
- disallowed_opaque_part.andNot(opaque_part);
- }
- // ----------------------- Characters allowed within and for each component
- /**
- * Those characters that are allowed for the authority component.
- */
- public static final BitSet allowed_authority = new BitSet(256);
- // Static initializer for allowed_authority
- static {
- allowed_authority.or(authority);
- allowed_authority.clear('%');
- }
- /**
- * Those characters that are allowed for the opaque_part.
- */
- public static final BitSet allowed_opaque_part = new BitSet(256);
- // Static initializer for allowed_opaque_part
- static {
- allowed_opaque_part.or(opaque_part);
- allowed_opaque_part.clear('%');
- }
- /**
- * Those characters that are allowed for the reg_name.
- */
- public static final BitSet allowed_reg_name = new BitSet(256);
- // Static initializer for allowed_reg_name
- static {
- allowed_reg_name.or(reg_name);
- // allowed_reg_name.andNot(percent);
- allowed_reg_name.clear('%');
- }
- /**
- * Those characters that are allowed for the userinfo component.
- */
- public static final BitSet allowed_userinfo = new BitSet(256);
- // Static initializer for allowed_userinfo
- static {
- allowed_userinfo.or(userinfo);
- // allowed_userinfo.andNot(percent);
- allowed_userinfo.clear('%');
- }
- /**
- * Those characters that are allowed for within the userinfo component.
- */
- public static final BitSet allowed_within_userinfo = new BitSet(256);
- // Static initializer for allowed_within_userinfo
- static {
- allowed_within_userinfo.or(within_userinfo);
- allowed_within_userinfo.clear('%');
- }
- /**
- * Those characters that are allowed for the IPv6reference component.
- * The characters '[', ']' in IPv6reference should be excluded.
- */
- public static final BitSet allowed_IPv6reference = new BitSet(256);
- // Static initializer for allowed_IPv6reference
- static {
- allowed_IPv6reference.or(IPv6reference);
- // allowed_IPv6reference.andNot(unwise);
- allowed_IPv6reference.clear('[');
- allowed_IPv6reference.clear(']');
- }
- /**
- * Those characters that are allowed for the host component.
- * The characters '[', ']' in IPv6reference should be excluded.
- */
- public static final BitSet allowed_host = new BitSet(256);
- // Static initializer for allowed_host
- static {
- allowed_host.or(hostname);
- allowed_host.or(allowed_IPv6reference);
- }
- /**
- * Those characters that are allowed for the authority component.
- */
- public static final BitSet allowed_within_authority = new BitSet(256);
- // Static initializer for allowed_within_authority
- static {
- allowed_within_authority.or(server);
- allowed_within_authority.or(reg_name);
- allowed_within_authority.clear(';');
- allowed_within_authority.clear(':');
- allowed_within_authority.clear('@');
- allowed_within_authority.clear('?');
- allowed_within_authority.clear('/');
- }
- /**
- * Those characters that are allowed for the abs_path.
- */
- public static final BitSet allowed_abs_path = new BitSet(256);
- // Static initializer for allowed_abs_path
- static {
- allowed_abs_path.or(abs_path);
- // allowed_abs_path.set('/'); // aleady included
- allowed_abs_path.andNot(percent);
- }
- /**
- * Those characters that are allowed for the rel_path.
- */
- public static final BitSet allowed_rel_path = new BitSet(256);
- // Static initializer for allowed_rel_path
- static {
- allowed_rel_path.or(rel_path);
- allowed_rel_path.clear('%');
- }
- /**
- * Those characters that are allowed within the path.
- */
- public static final BitSet allowed_within_path = new BitSet(256);
- // Static initializer for allowed_within_path
- static {
- allowed_within_path.or(abs_path);
- allowed_within_path.clear('/');
- allowed_within_path.clear(';');
- allowed_within_path.clear('=');
- allowed_within_path.clear('?');
- }
- /**
- * Those characters that are allowed for the query component.
- */
- public static final BitSet allowed_query = new BitSet(256);
- // Static initializer for allowed_query
- static {
- allowed_query.or(uric);
- allowed_query.clear('%');
- }
- /**
- * Those characters that are allowed within the query component.
- */
- public static final BitSet allowed_within_query = new BitSet(256);
- // Static initializer for allowed_within_query
- static {
- allowed_within_query.or(allowed_query);
- allowed_within_query.andNot(reserved); // excluded 'reserved'
- }
- /**
- * Those characters that are allowed for the fragment component.
- */
- public static final BitSet allowed_fragment = new BitSet(256);
- // Static initializer for allowed_fragment
- static {
- allowed_fragment.or(uric);
- allowed_fragment.clear('%');
- }
- // ------------------------------------------- Flags for this URI-reference
- // TODO: Figure out what all these variables are for and provide javadoc
- // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- // absoluteURI = scheme ":" ( hier_part | opaque_part )
- protected boolean _is_hier_part;
- protected boolean _is_opaque_part;
- // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- // hier_part = ( net_path | abs_path ) [ "?" query ]
- protected boolean _is_net_path;
- protected boolean _is_abs_path;
- protected boolean _is_rel_path;
- // net_path = "//" authority [ abs_path ]