1. /*
  2. * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.47 2004/05/13 04:03:25 mbecke Exp $
  3. * $Revision: 1.47 $
  4. * $Date: 2004/05/13 04:03:25 $
  5. *
  6. * ====================================================================
  7. *
  8. * Copyright 2002-2004 The Apache Software Foundation
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * ====================================================================
  22. *
  23. * This software consists of voluntary contributions made by many
  24. * individuals on behalf of the Apache Software Foundation. For more
  25. * information on the Apache Software Foundation, please see
  26. * <http://www.apache.org/>.
  27. *
  28. */
  29. package org.apache.commons.httpclient;
  30. import java.io.IOException;
  31. import java.io.ObjectInputStream;
  32. import java.io.ObjectOutputStream;
  33. import java.io.Serializable;
  34. import java.util.Locale;
  35. import java.util.BitSet;
  36. import java.util.Hashtable;
  37. import org.apache.commons.codec.DecoderException;
  38. import org.apache.commons.codec.net.URLCodec;
  39. import org.apache.commons.httpclient.util.EncodingUtil;
  40. /**
  41. * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
  42. * This class has the purpose of supportting of parsing a URI reference to
  43. * extend any specific protocols, the character encoding of the protocol to
  44. * be transported and the charset of the document.
  45. * <p>
  46. * A URI is always in an "escaped" form, since escaping or unescaping a
  47. * completed URI might change its semantics.
  48. * <p>
  49. * Implementers should be careful not to escape or unescape the same string
  50. * more than once, since unescaping an already unescaped string might lead to
  51. * misinterpreting a percent data character as another escaped character,
  52. * or vice versa in the case of escaping an already escaped string.
  53. * <p>
  54. * In order to avoid these problems, data types used as follows:
  55. * <p><blockquote><pre>
  56. * URI character sequence: char
  57. * octet sequence: byte
  58. * original character sequence: String
  59. * </pre></blockquote><p>
  60. *
  61. * So, a URI is a sequence of characters as an array of a char type, which
  62. * is not always represented as a sequence of octets as an array of byte.
  63. * <p>
  64. *
  65. * URI Syntactic Components
  66. * <p><blockquote><pre>
  67. * - In general, written as follows:
  68. * Absolute URI = <scheme>:<scheme-specific-part>
  69. * Generic URI = <scheme>://<authority><path>?<query>
  70. *
  71. * - Syntax
  72. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  73. * hier_part = ( net_path | abs_path ) [ "?" query ]
  74. * net_path = "//" authority [ abs_path ]
  75. * abs_path = "/" path_segments
  76. * </pre></blockquote><p>
  77. *
  78. * The following examples illustrate URI that are in common use.
  79. * <pre>
  80. * ftp://ftp.is.co.za/rfc/rfc1808.txt
  81. * -- ftp scheme for File Transfer Protocol services
  82. * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
  83. * -- gopher scheme for Gopher and Gopher+ Protocol services
  84. * http://www.math.uio.no/faq/compression-faq/part1.html
  85. * -- http scheme for Hypertext Transfer Protocol services
  86. * mailto:mduerst@ifi.unizh.ch
  87. * -- mailto scheme for electronic mail addresses
  88. * news:comp.infosystems.www.servers.unix
  89. * -- news scheme for USENET news groups and articles
  90. * telnet://melvyl.ucop.edu/
  91. * -- telnet scheme for interactive services via the TELNET Protocol
  92. * </pre>
  93. * Please, notice that there are many modifications from URL(RFC 1738) and
  94. * relative URL(RFC 1808).
  95. * <p>
  96. * <b>The expressions for a URI</b>
  97. * <p><pre>
  98. * For escaped URI forms
  99. * - URI(char[]) // constructor
  100. * - char[] getRawXxx() // method
  101. * - String getEscapedXxx() // method
  102. * - String toString() // method
  103. * <p>
  104. * For unescaped URI forms
  105. * - URI(String) // constructor
  106. * - String getXXX() // method
  107. * </pre><p>
  108. *
  109. * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
  110. * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
  111. * @version $Revision: 1.47 $ $Date: 2002/03/14 15:14:01
  112. */
  113. public class URI implements Cloneable, Comparable, Serializable {
  114. // ----------------------------------------------------------- Constructors
  115. /** Create an instance as an internal use */
  116. protected URI() {
  117. }
  118. /**
  119. * Construct a URI from a string with the given charset. The input string can
  120. * be either in escaped or unescaped form.
  121. *
  122. * @param s URI character sequence
  123. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  124. * <tt>false</tt> otherwise.
  125. * @param charset the charset string to do escape encoding, if required
  126. *
  127. * @throws URIException If the URI cannot be created.
  128. * @throws NullPointerException if input string is <code>null</code>
  129. *
  130. * @see #getProtocolCharset
  131. *
  132. * @since 3.0
  133. */
  134. public URI(String s, boolean escaped, String charset)
  135. throws URIException, NullPointerException {
  136. protocolCharset = charset;
  137. parseUriReference(s, escaped);
  138. }
  139. /**
  140. * Construct a URI from a string with the given charset. The input string can
  141. * be either in escaped or unescaped form.
  142. *
  143. * @param s URI character sequence
  144. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  145. * <tt>false</tt> otherwise.
  146. *
  147. * @throws URIException If the URI cannot be created.
  148. * @throws NullPointerException if input string is <code>null</code>
  149. *
  150. * @see #getProtocolCharset
  151. *
  152. * @since 3.0
  153. */
  154. public URI(String s, boolean escaped)
  155. throws URIException, NullPointerException {
  156. parseUriReference(s, escaped);
  157. }
  158. /**
  159. * Construct a URI as an escaped form of a character array with the given
  160. * charset.
  161. *
  162. * @param escaped the URI character sequence
  163. * @param charset the charset string to do escape encoding
  164. * @throws URIException If the URI cannot be created.
  165. * @throws NullPointerException if <code>escaped</code> is <code>null</code>
  166. * @see #getProtocolCharset
  167. *
  168. * @deprecated Use #URI(String, boolean, String)
  169. */
  170. public URI(char[] escaped, String charset)
  171. throws URIException, NullPointerException {
  172. protocolCharset = charset;
  173. parseUriReference(new String(escaped), true);
  174. }
  175. /**
  176. * Construct a URI as an escaped form of a character array.
  177. * An URI can be placed within double-quotes or angle brackets like
  178. * "http://test.com/" and <http://test.com/>
  179. *
  180. * @param escaped the URI character sequence
  181. * @throws URIException If the URI cannot be created.
  182. * @throws NullPointerException if <code>escaped</code> is <code>null</code>
  183. * @see #getDefaultProtocolCharset
  184. *
  185. * @deprecated Use #URI(String, boolean)
  186. */
  187. public URI(char[] escaped)
  188. throws URIException, NullPointerException {
  189. parseUriReference(new String(escaped), true);
  190. }
  191. /**
  192. * Construct a URI from the given string with the given charset.
  193. *
  194. * @param original the string to be represented to URI character sequence
  195. * It is one of absoluteURI and relativeURI.
  196. * @param charset the charset string to do escape encoding
  197. * @throws URIException If the URI cannot be created.
  198. * @see #getProtocolCharset
  199. *
  200. * @deprecated Use #URI(String, boolean, String)
  201. */
  202. public URI(String original, String charset) throws URIException {
  203. protocolCharset = charset;
  204. parseUriReference(original, false);
  205. }
  206. /**
  207. * Construct a URI from the given string.
  208. * <p><blockquote><pre>
  209. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  210. * </pre></blockquote><p>
  211. * An URI can be placed within double-quotes or angle brackets like
  212. * "http://test.com/" and <http://test.com/>
  213. *
  214. * @param original the string to be represented to URI character sequence
  215. * It is one of absoluteURI and relativeURI.
  216. * @throws URIException If the URI cannot be created.
  217. * @see #getDefaultProtocolCharset
  218. *
  219. * @deprecated Use #URI(String, boolean)
  220. */
  221. public URI(String original) throws URIException {
  222. parseUriReference(original, false);
  223. }
  224. /**
  225. * Construct a general URI from the given components.
  226. * <p><blockquote><pre>
  227. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  228. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  229. * opaque_part = uric_no_slash *uric
  230. * </pre></blockquote><p>
  231. * It's for absolute URI = <scheme>:<scheme-specific-part>#
  232. * <fragment>.
  233. *
  234. * @param scheme the scheme string
  235. * @param schemeSpecificPart scheme_specific_part
  236. * @param fragment the fragment string
  237. * @throws URIException If the URI cannot be created.
  238. * @see #getDefaultProtocolCharset
  239. */
  240. public URI(String scheme, String schemeSpecificPart, String fragment)
  241. throws URIException {
  242. // validate and contruct the URI character sequence
  243. if (scheme == null) {
  244. throw new URIException(URIException.PARSING, "scheme required");
  245. }
  246. char[] s = scheme.toLowerCase().toCharArray();
  247. if (validate(s, URI.scheme)) {
  248. _scheme = s; // is_absoluteURI
  249. } else {
  250. throw new URIException(URIException.PARSING, "incorrect scheme");
  251. }
  252. _opaque = encode(schemeSpecificPart, allowed_opaque_part,
  253. getProtocolCharset());
  254. // Set flag
  255. _is_opaque_part = true;
  256. _fragment = fragment.toCharArray();
  257. setURI();
  258. }
  259. /**
  260. * Construct a general URI from the given components.
  261. * <p><blockquote><pre>
  262. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  263. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  264. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  265. * hier_part = ( net_path | abs_path ) [ "?" query ]
  266. * </pre></blockquote><p>
  267. * It's for absolute URI = <scheme>:<path>?<query>#<
  268. * fragment> and relative URI = <path>?<query>#<fragment
  269. * >.
  270. *
  271. * @param scheme the scheme string
  272. * @param authority the authority string
  273. * @param path the path string
  274. * @param query the query string
  275. * @param fragment the fragment string
  276. * @throws URIException If the new URI cannot be created.
  277. * @see #getDefaultProtocolCharset
  278. */
  279. public URI(String scheme, String authority, String path, String query,
  280. String fragment) throws URIException {
  281. // validate and contruct the URI character sequence
  282. StringBuffer buff = new StringBuffer();
  283. if (scheme != null) {
  284. buff.append(scheme);
  285. buff.append(':');
  286. }
  287. if (authority != null) {
  288. buff.append("//");
  289. buff.append(authority);
  290. }
  291. if (path != null) { // accept empty path
  292. if ((scheme != null || authority != null)
  293. && !path.startsWith("/")) {
  294. throw new URIException(URIException.PARSING,
  295. "abs_path requested");
  296. }
  297. buff.append(path);
  298. }
  299. if (query != null) {
  300. buff.append('?');
  301. buff.append(query);
  302. }
  303. if (fragment != null) {
  304. buff.append('#');
  305. buff.append(fragment);
  306. }
  307. parseUriReference(buff.toString(), false);
  308. }
  309. /**
  310. * Construct a general URI from the given components.
  311. *
  312. * @param scheme the scheme string
  313. * @param userinfo the userinfo string
  314. * @param host the host string
  315. * @param port the port number
  316. * @throws URIException If the new URI cannot be created.
  317. * @see #getDefaultProtocolCharset
  318. */
  319. public URI(String scheme, String userinfo, String host, int port)
  320. throws URIException {
  321. this(scheme, userinfo, host, port, null, null, null);
  322. }
  323. /**
  324. * Construct a general URI from the given components.
  325. *
  326. * @param scheme the scheme string
  327. * @param userinfo the userinfo string
  328. * @param host the host string
  329. * @param port the port number
  330. * @param path the path string
  331. * @throws URIException If the new URI cannot be created.
  332. * @see #getDefaultProtocolCharset
  333. */
  334. public URI(String scheme, String userinfo, String host, int port,
  335. String path) throws URIException {
  336. this(scheme, userinfo, host, port, path, null, null);
  337. }
  338. /**
  339. * Construct a general URI from the given components.
  340. *
  341. * @param scheme the scheme string
  342. * @param userinfo the userinfo string
  343. * @param host the host string
  344. * @param port the port number
  345. * @param path the path string
  346. * @param query the query string
  347. * @throws URIException If the new URI cannot be created.
  348. * @see #getDefaultProtocolCharset
  349. */
  350. public URI(String scheme, String userinfo, String host, int port,
  351. String path, String query) throws URIException {
  352. this(scheme, userinfo, host, port, path, query, null);
  353. }
  354. /**
  355. * Construct a general URI from the given components.
  356. *
  357. * @param scheme the scheme string
  358. * @param userinfo the userinfo string
  359. * @param host the host string
  360. * @param port the port number
  361. * @param path the path string
  362. * @param query the query string
  363. * @param fragment the fragment string
  364. * @throws URIException If the new URI cannot be created.
  365. * @see #getDefaultProtocolCharset
  366. */
  367. public URI(String scheme, String userinfo, String host, int port,
  368. String path, String query, String fragment) throws URIException {
  369. this(scheme, (host == null) ? null
  370. : ((userinfo != null) ? userinfo + '@' : "") + host
  371. + ((port != -1) ? ":" + port : ""), path, query, fragment);
  372. }
  373. /**
  374. * Construct a general URI from the given components.
  375. *
  376. * @param scheme the scheme string
  377. * @param host the host string
  378. * @param path the path string
  379. * @param fragment the fragment string
  380. * @throws URIException If the new URI cannot be created.
  381. * @see #getDefaultProtocolCharset
  382. */
  383. public URI(String scheme, String host, String path, String fragment)
  384. throws URIException {
  385. this(scheme, host, path, null, fragment);
  386. }
  387. /**
  388. * Construct a general URI with the given relative URI string.
  389. *
  390. * @param base the base URI
  391. * @param relative the relative URI string
  392. * @throws URIException If the new URI cannot be created.
  393. *
  394. * @deprecated Use #URI(URI, String, boolean)
  395. */
  396. public URI(URI base, String relative) throws URIException {
  397. this(base, new URI(relative));
  398. }
  399. /**
  400. * Construct a general URI with the given relative URI string.
  401. *
  402. * @param base the base URI
  403. * @param relative the relative URI string
  404. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  405. * <tt>false</tt> otherwise.
  406. *
  407. * @throws URIException If the new URI cannot be created.
  408. *
  409. * @since 3.0
  410. */
  411. public URI(URI base, String relative, boolean escaped) throws URIException {
  412. this(base, new URI(relative, escaped));
  413. }
  414. /**
  415. * Construct a general URI with the given relative URI.
  416. * <p><blockquote><pre>
  417. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  418. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  419. * </pre></blockquote><p>
  420. * Resolving Relative References to Absolute Form.
  421. *
  422. * <strong>Examples of Resolving Relative URI References</strong>
  423. *
  424. * Within an object with a well-defined base URI of
  425. * <p><blockquote><pre>
  426. * http://a/b/c/d;p?q
  427. * </pre></blockquote><p>
  428. * the relative URI would be resolved as follows:
  429. *
  430. * Normal Examples
  431. *
  432. * <p><blockquote><pre>
  433. * g:h = g:h
  434. * g = http://a/b/c/g
  435. * ./g = http://a/b/c/g
  436. * g/ = http://a/b/c/g/
  437. * /g = http://a/g
  438. * //g = http://g
  439. * ?y = http://a/b/c/?y
  440. * g?y = http://a/b/c/g?y
  441. * #s = (current document)#s
  442. * g#s = http://a/b/c/g#s
  443. * g?y#s = http://a/b/c/g?y#s
  444. * ;x = http://a/b/c/;x
  445. * g;x = http://a/b/c/g;x
  446. * g;x?y#s = http://a/b/c/g;x?y#s
  447. * . = http://a/b/c/
  448. * ./ = http://a/b/c/
  449. * .. = http://a/b/
  450. * ../ = http://a/b/
  451. * ../g = http://a/b/g
  452. * ../.. = http://a/
  453. * ../../ = http://a/
  454. * ../../g = http://a/g
  455. * </pre></blockquote><p>
  456. *
  457. * Some URI schemes do not allow a hierarchical syntax matching the
  458. * <hier_part> syntax, and thus cannot use relative references.
  459. *
  460. * @param base the base URI
  461. * @param relative the relative URI
  462. * @throws URIException If the new URI cannot be created.
  463. */
  464. public URI(URI base, URI relative) throws URIException {
  465. if (base._scheme == null) {
  466. throw new URIException(URIException.PARSING, "base URI required");
  467. }
  468. if (base._scheme != null) {
  469. this._scheme = base._scheme;
  470. this._authority = base._authority;
  471. }
  472. if (base._is_opaque_part || relative._is_opaque_part) {
  473. this._scheme = base._scheme;
  474. this._is_opaque_part = base._is_opaque_part
  475. || relative._is_opaque_part;
  476. this._opaque = relative._opaque;
  477. this._fragment = relative._fragment;
  478. this.setURI();
  479. return;
  480. }
  481. if (relative._scheme != null) {
  482. this._scheme = relative._scheme;
  483. this._is_net_path = relative._is_net_path;
  484. this._authority = relative._authority;
  485. if (relative._is_server) {
  486. this._is_server = relative._is_server;
  487. this._userinfo = relative._userinfo;
  488. this._host = relative._host;
  489. this._port = relative._port;
  490. } else if (relative._is_reg_name) {
  491. this._is_reg_name = relative._is_reg_name;
  492. }
  493. this._is_abs_path = relative._is_abs_path;
  494. this._is_rel_path = relative._is_rel_path;
  495. this._path = relative._path;
  496. } else if (base._authority != null && relative._scheme == null) {
  497. this._is_net_path = base._is_net_path;
  498. this._authority = base._authority;
  499. if (base._is_server) {
  500. this._is_server = base._is_server;
  501. this._userinfo = base._userinfo;
  502. this._host = base._host;
  503. this._port = base._port;
  504. } else if (base._is_reg_name) {
  505. this._is_reg_name = base._is_reg_name;
  506. }
  507. }
  508. if (relative._authority != null) {
  509. this._is_net_path = relative._is_net_path;
  510. this._authority = relative._authority;
  511. if (relative._is_server) {
  512. this._is_server = relative._is_server;
  513. this._userinfo = relative._userinfo;
  514. this._host = relative._host;
  515. this._port = relative._port;
  516. } else if (relative._is_reg_name) {
  517. this._is_reg_name = relative._is_reg_name;
  518. }
  519. this._is_abs_path = relative._is_abs_path;
  520. this._is_rel_path = relative._is_rel_path;
  521. this._path = relative._path;
  522. }
  523. // resolve the path and query if necessary
  524. if (relative._scheme == null && relative._authority == null) {
  525. if ((relative._path == null || relative._path.length == 0)
  526. && relative._query == null) {
  527. // handle a reference to the current document, see RFC 2396
  528. // section 5.2 step 2
  529. this._path = base._path;
  530. this._query = base._query;
  531. } else {
  532. this._path = resolvePath(base._path, relative._path);
  533. }
  534. }
  535. // base._query removed
  536. if (relative._query != null) {
  537. this._query = relative._query;
  538. }
  539. // base._fragment removed
  540. if (relative._fragment != null) {
  541. this._fragment = relative._fragment;
  542. }
  543. this.setURI();
  544. // reparse the newly built URI, this will ensure that all flags are set correctly.
  545. // TODO there must be a better way to do this
  546. parseUriReference(new String(_uri), true);
  547. }
  548. // --------------------------------------------------- Instance Variables
  549. /** Version ID for serialization */
  550. static final long serialVersionUID = 604752400577948726L;
  551. /**
  552. * Cache the hash code for this URI.
  553. */
  554. protected int hash = 0;
  555. /**
  556. * This Uniform Resource Identifier (URI).
  557. * The URI is always in an "escaped" form, since escaping or unescaping
  558. * a completed URI might change its semantics.
  559. */
  560. protected char[] _uri = null;
  561. /**
  562. * The charset of the protocol used by this URI instance.
  563. */
  564. protected String protocolCharset = null;
  565. /**
  566. * The default charset of the protocol. RFC 2277, 2396
  567. */
  568. protected static String defaultProtocolCharset = "UTF-8";
  569. /**
  570. * The default charset of the document. RFC 2277, 2396
  571. * The platform's charset is used for the document by default.
  572. */
  573. protected static String defaultDocumentCharset = null;
  574. protected static String defaultDocumentCharsetByLocale = null;
  575. protected static String defaultDocumentCharsetByPlatform = null;
  576. // Static initializer for defaultDocumentCharset
  577. static {
  578. Locale locale = Locale.getDefault();
  579. // in order to support backward compatiblity
  580. if (locale != null) {
  581. defaultDocumentCharsetByLocale =
  582. LocaleToCharsetMap.getCharset(locale);
  583. // set the default document charset
  584. defaultDocumentCharset = defaultDocumentCharsetByLocale;
  585. }
  586. // in order to support platform encoding
  587. try {
  588. defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
  589. } catch (SecurityException ignore) {
  590. }
  591. if (defaultDocumentCharset == null) {
  592. // set the default document charset
  593. defaultDocumentCharset = defaultDocumentCharsetByPlatform;
  594. }
  595. }
  596. /**
  597. * The scheme.
  598. */
  599. protected char[] _scheme = null;
  600. /**
  601. * The opaque.
  602. */
  603. protected char[] _opaque = null;
  604. /**
  605. * The authority.
  606. */
  607. protected char[] _authority = null;
  608. /**
  609. * The userinfo.
  610. */
  611. protected char[] _userinfo = null;
  612. /**
  613. * The host.
  614. */
  615. protected char[] _host = null;
  616. /**
  617. * The port.
  618. */
  619. protected int _port = -1;
  620. /**
  621. * The path.
  622. */
  623. protected char[] _path = null;
  624. /**
  625. * The query.
  626. */
  627. protected char[] _query = null;
  628. /**
  629. * The fragment.
  630. */
  631. protected char[] _fragment = null;
  632. /**
  633. * The root path.
  634. */
  635. protected static char[] rootPath = { '/' };
  636. // ---------------------- Generous characters for each component validation
  637. /**
  638. * The percent "%" character always has the reserved purpose of being the
  639. * escape indicator, it must be escaped as "%25" in order to be used as
  640. * data within a URI.
  641. */
  642. protected static final BitSet percent = new BitSet(256);
  643. // Static initializer for percent
  644. static {
  645. percent.set('%');
  646. }
  647. /**
  648. * BitSet for digit.
  649. * <p><blockquote><pre>
  650. * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
  651. * "8" | "9"
  652. * </pre></blockquote><p>
  653. */
  654. protected static final BitSet digit = new BitSet(256);
  655. // Static initializer for digit
  656. static {
  657. for (int i = '0'; i <= '9'; i++) {
  658. digit.set(i);
  659. }
  660. }
  661. /**
  662. * BitSet for alpha.
  663. * <p><blockquote><pre>
  664. * alpha = lowalpha | upalpha
  665. * </pre></blockquote><p>
  666. */
  667. protected static final BitSet alpha = new BitSet(256);
  668. // Static initializer for alpha
  669. static {
  670. for (int i = 'a'; i <= 'z'; i++) {
  671. alpha.set(i);
  672. }
  673. for (int i = 'A'; i <= 'Z'; i++) {
  674. alpha.set(i);
  675. }
  676. }
  677. /**
  678. * BitSet for alphanum (join of alpha & digit).
  679. * <p><blockquote><pre>
  680. * alphanum = alpha | digit
  681. * </pre></blockquote><p>
  682. */
  683. protected static final BitSet alphanum = new BitSet(256);
  684. // Static initializer for alphanum
  685. static {
  686. alphanum.or(alpha);
  687. alphanum.or(digit);
  688. }
  689. /**
  690. * BitSet for hex.
  691. * <p><blockquote><pre>
  692. * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  693. * "a" | "b" | "c" | "d" | "e" | "f"
  694. * </pre></blockquote><p>
  695. */
  696. protected static final BitSet hex = new BitSet(256);
  697. // Static initializer for hex
  698. static {
  699. hex.or(digit);
  700. for (int i = 'a'; i <= 'f'; i++) {
  701. hex.set(i);
  702. }
  703. for (int i = 'A'; i <= 'F'; i++) {
  704. hex.set(i);
  705. }
  706. }
  707. /**
  708. * BitSet for escaped.
  709. * <p><blockquote><pre>
  710. * escaped = "%" hex hex
  711. * </pre></blockquote><p>
  712. */
  713. protected static final BitSet escaped = new BitSet(256);
  714. // Static initializer for escaped
  715. static {
  716. escaped.or(percent);
  717. escaped.or(hex);
  718. }
  719. /**
  720. * BitSet for mark.
  721. * <p><blockquote><pre>
  722. * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  723. * "(" | ")"
  724. * </pre></blockquote><p>
  725. */
  726. protected static final BitSet mark = new BitSet(256);
  727. // Static initializer for mark
  728. static {
  729. mark.set('-');
  730. mark.set('_');
  731. mark.set('.');
  732. mark.set('!');
  733. mark.set('~');
  734. mark.set('*');
  735. mark.set('\'');
  736. mark.set('(');
  737. mark.set(')');
  738. }
  739. /**
  740. * Data characters that are allowed in a URI but do not have a reserved
  741. * purpose are called unreserved.
  742. * <p><blockquote><pre>
  743. * unreserved = alphanum | mark
  744. * </pre></blockquote><p>
  745. */
  746. protected static final BitSet unreserved = new BitSet(256);
  747. // Static initializer for unreserved
  748. static {
  749. unreserved.or(alphanum);
  750. unreserved.or(mark);
  751. }
  752. /**
  753. * BitSet for reserved.
  754. * <p><blockquote><pre>
  755. * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  756. * "$" | ","
  757. * </pre></blockquote><p>
  758. */
  759. protected static final BitSet reserved = new BitSet(256);
  760. // Static initializer for reserved
  761. static {
  762. reserved.set(';');
  763. reserved.set('/');
  764. reserved.set('?');
  765. reserved.set(':');
  766. reserved.set('@');
  767. reserved.set('&');
  768. reserved.set('=');
  769. reserved.set('+');
  770. reserved.set('$');
  771. reserved.set(',');
  772. }
  773. /**
  774. * BitSet for uric.
  775. * <p><blockquote><pre>
  776. * uric = reserved | unreserved | escaped
  777. * </pre></blockquote><p>
  778. */
  779. protected static final BitSet uric = new BitSet(256);
  780. // Static initializer for uric
  781. static {
  782. uric.or(reserved);
  783. uric.or(unreserved);
  784. uric.or(escaped);
  785. }
  786. /**
  787. * BitSet for fragment (alias for uric).
  788. * <p><blockquote><pre>
  789. * fragment = *uric
  790. * </pre></blockquote><p>
  791. */
  792. protected static final BitSet fragment = uric;
  793. /**
  794. * BitSet for query (alias for uric).
  795. * <p><blockquote><pre>
  796. * query = *uric
  797. * </pre></blockquote><p>
  798. */
  799. protected static final BitSet query = uric;
  800. /**
  801. * BitSet for pchar.
  802. * <p><blockquote><pre>
  803. * pchar = unreserved | escaped |
  804. * ":" | "@" | "&" | "=" | "+" | "$" | ","
  805. * </pre></blockquote><p>
  806. */
  807. protected static final BitSet pchar = new BitSet(256);
  808. // Static initializer for pchar
  809. static {
  810. pchar.or(unreserved);
  811. pchar.or(escaped);
  812. pchar.set(':');
  813. pchar.set('@');
  814. pchar.set('&');
  815. pchar.set('=');
  816. pchar.set('+');
  817. pchar.set('$');
  818. pchar.set(',');
  819. }
  820. /**
  821. * BitSet for param (alias for pchar).
  822. * <p><blockquote><pre>
  823. * param = *pchar
  824. * </pre></blockquote><p>
  825. */
  826. protected static final BitSet param = pchar;
  827. /**
  828. * BitSet for segment.
  829. * <p><blockquote><pre>
  830. * segment = *pchar *( ";" param )
  831. * </pre></blockquote><p>
  832. */
  833. protected static final BitSet segment = new BitSet(256);
  834. // Static initializer for segment
  835. static {
  836. segment.or(pchar);
  837. segment.set(';');
  838. segment.or(param);
  839. }
  840. /**
  841. * BitSet for path segments.
  842. * <p><blockquote><pre>
  843. * path_segments = segment *( "/" segment )
  844. * </pre></blockquote><p>
  845. */
  846. protected static final BitSet path_segments = new BitSet(256);
  847. // Static initializer for path_segments
  848. static {
  849. path_segments.set('/');
  850. path_segments.or(segment);
  851. }
  852. /**
  853. * URI absolute path.
  854. * <p><blockquote><pre>
  855. * abs_path = "/" path_segments
  856. * </pre></blockquote><p>
  857. */
  858. protected static final BitSet abs_path = new BitSet(256);
  859. // Static initializer for abs_path
  860. static {
  861. abs_path.set('/');
  862. abs_path.or(path_segments);
  863. }
  864. /**
  865. * URI bitset for encoding typical non-slash characters.
  866. * <p><blockquote><pre>
  867. * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
  868. * "&" | "=" | "+" | "$" | ","
  869. * </pre></blockquote><p>
  870. */
  871. protected static final BitSet uric_no_slash = new BitSet(256);
  872. // Static initializer for uric_no_slash
  873. static {
  874. uric_no_slash.or(unreserved);
  875. uric_no_slash.or(escaped);
  876. uric_no_slash.set(';');
  877. uric_no_slash.set('?');
  878. uric_no_slash.set(';');
  879. uric_no_slash.set('@');
  880. uric_no_slash.set('&');
  881. uric_no_slash.set('=');
  882. uric_no_slash.set('+');
  883. uric_no_slash.set('$');
  884. uric_no_slash.set(',');
  885. }
  886. /**
  887. * URI bitset that combines uric_no_slash and uric.
  888. * <p><blockquote><pre>
  889. * opaque_part = uric_no_slash *uric
  890. * </pre></blockquote><p>
  891. */
  892. protected static final BitSet opaque_part = new BitSet(256);
  893. // Static initializer for opaque_part
  894. static {
  895. // it's generous. because first character must not include a slash
  896. opaque_part.or(uric_no_slash);
  897. opaque_part.or(uric);
  898. }
  899. /**
  900. * URI bitset that combines absolute path and opaque part.
  901. * <p><blockquote><pre>
  902. * path = [ abs_path | opaque_part ]
  903. * </pre></blockquote><p>
  904. */
  905. protected static final BitSet path = new BitSet(256);
  906. // Static initializer for path
  907. static {
  908. path.or(abs_path);
  909. path.or(opaque_part);
  910. }
  911. /**
  912. * Port, a logical alias for digit.
  913. */
  914. protected static final BitSet port = digit;
  915. /**
  916. * Bitset that combines digit and dot fo IPv$address.
  917. * <p><blockquote><pre>
  918. * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
  919. * </pre></blockquote><p>
  920. */
  921. protected static final BitSet IPv4address = new BitSet(256);
  922. // Static initializer for IPv4address
  923. static {
  924. IPv4address.or(digit);
  925. IPv4address.set('.');
  926. }
  927. /**
  928. * RFC 2373.
  929. * <p><blockquote><pre>
  930. * IPv6address = hexpart [ ":" IPv4address ]
  931. * </pre></blockquote><p>
  932. */
  933. protected static final BitSet IPv6address = new BitSet(256);
  934. // Static initializer for IPv6address reference
  935. static {
  936. IPv6address.or(hex); // hexpart
  937. IPv6address.set(':');
  938. IPv6address.or(IPv4address);
  939. }
  940. /**
  941. * RFC 2732, 2373.
  942. * <p><blockquote><pre>
  943. * IPv6reference = "[" IPv6address "]"
  944. * </pre></blockquote><p>
  945. */
  946. protected static final BitSet IPv6reference = new BitSet(256);
  947. // Static initializer for IPv6reference
  948. static {
  949. IPv6reference.set('[');
  950. IPv6reference.or(IPv6address);
  951. IPv6reference.set(']');
  952. }
  953. /**
  954. * BitSet for toplabel.
  955. * <p><blockquote><pre>
  956. * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  957. * </pre></blockquote><p>
  958. */
  959. protected static final BitSet toplabel = new BitSet(256);
  960. // Static initializer for toplabel
  961. static {
  962. toplabel.or(alphanum);
  963. toplabel.set('-');
  964. }
  965. /**
  966. * BitSet for domainlabel.
  967. * <p><blockquote><pre>
  968. * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  969. * </pre></blockquote><p>
  970. */
  971. protected static final BitSet domainlabel = toplabel;
  972. /**
  973. * BitSet for hostname.
  974. * <p><blockquote><pre>
  975. * hostname = *( domainlabel "." ) toplabel [ "." ]
  976. * </pre></blockquote><p>
  977. */
  978. protected static final BitSet hostname = new BitSet(256);
  979. // Static initializer for hostname
  980. static {
  981. hostname.or(toplabel);
  982. // hostname.or(domainlabel);
  983. hostname.set('.');
  984. }
  985. /**
  986. * BitSet for host.
  987. * <p><blockquote><pre>
  988. * host = hostname | IPv4address | IPv6reference
  989. * </pre></blockquote><p>
  990. */
  991. protected static final BitSet host = new BitSet(256);
  992. // Static initializer for host
  993. static {
  994. host.or(hostname);
  995. // host.or(IPv4address);
  996. host.or(IPv6reference); // IPv4address
  997. }
  998. /**
  999. * BitSet for hostport.
  1000. * <p><blockquote><pre>
  1001. * hostport = host [ ":" port ]
  1002. * </pre></blockquote><p>
  1003. */
  1004. protected static final BitSet hostport = new BitSet(256);
  1005. // Static initializer for hostport
  1006. static {
  1007. hostport.or(host);
  1008. hostport.set(':');
  1009. hostport.or(port);
  1010. }
  1011. /**
  1012. * Bitset for userinfo.
  1013. * <p><blockquote><pre>
  1014. * userinfo = *( unreserved | escaped |
  1015. * ";" | ":" | "&" | "=" | "+" | "$" | "," )
  1016. * </pre></blockquote><p>
  1017. */
  1018. protected static final BitSet userinfo = new BitSet(256);
  1019. // Static initializer for userinfo
  1020. static {
  1021. userinfo.or(unreserved);
  1022. userinfo.or(escaped);
  1023. userinfo.set(';');
  1024. userinfo.set(':');
  1025. userinfo.set('&');
  1026. userinfo.set('=');
  1027. userinfo.set('+');
  1028. userinfo.set('$');
  1029. userinfo.set(',');
  1030. }
  1031. /**
  1032. * BitSet for within the userinfo component like user and password.
  1033. */
  1034. public static final BitSet within_userinfo = new BitSet(256);
  1035. // Static initializer for within_userinfo
  1036. static {
  1037. within_userinfo.or(userinfo);
  1038. within_userinfo.clear(';'); // reserved within authority
  1039. within_userinfo.clear(':');
  1040. within_userinfo.clear('@');
  1041. within_userinfo.clear('?');
  1042. within_userinfo.clear('/');
  1043. }
  1044. /**
  1045. * Bitset for server.
  1046. * <p><blockquote><pre>
  1047. * server = [ [ userinfo "@" ] hostport ]
  1048. * </pre></blockquote><p>
  1049. */
  1050. protected static final BitSet server = new BitSet(256);
  1051. // Static initializer for server
  1052. static {
  1053. server.or(userinfo);
  1054. server.set('@');
  1055. server.or(hostport);
  1056. }
  1057. /**
  1058. * BitSet for reg_name.
  1059. * <p><blockquote><pre>
  1060. * reg_name = 1*( unreserved | escaped | "$" | "," |
  1061. * ";" | ":" | "@" | "&" | "=" | "+" )
  1062. * </pre></blockquote><p>
  1063. */
  1064. protected static final BitSet reg_name = new BitSet(256);
  1065. // Static initializer for reg_name
  1066. static {
  1067. reg_name.or(unreserved);
  1068. reg_name.or(escaped);
  1069. reg_name.set('$');
  1070. reg_name.set(',');
  1071. reg_name.set(';');
  1072. reg_name.set(':');
  1073. reg_name.set('@');
  1074. reg_name.set('&');
  1075. reg_name.set('=');
  1076. reg_name.set('+');
  1077. }
  1078. /**
  1079. * BitSet for authority.
  1080. * <p><blockquote><pre>
  1081. * authority = server | reg_name
  1082. * </pre></blockquote><p>
  1083. */
  1084. protected static final BitSet authority = new BitSet(256);
  1085. // Static initializer for authority
  1086. static {
  1087. authority.or(server);
  1088. authority.or(reg_name);
  1089. }
  1090. /**
  1091. * BitSet for scheme.
  1092. * <p><blockquote><pre>
  1093. * scheme = alpha *( alpha | digit | "+" | "-" | "." )
  1094. * </pre></blockquote><p>
  1095. */
  1096. protected static final BitSet scheme = new BitSet(256);
  1097. // Static initializer for scheme
  1098. static {
  1099. scheme.or(alpha);
  1100. scheme.or(digit);
  1101. scheme.set('+');
  1102. scheme.set('-');
  1103. scheme.set('.');
  1104. }
  1105. /**
  1106. * BitSet for rel_segment.
  1107. * <p><blockquote><pre>
  1108. * rel_segment = 1*( unreserved | escaped |
  1109. * ";" | "@" | "&" | "=" | "+" | "$" | "," )
  1110. * </pre></blockquote><p>
  1111. */
  1112. protected static final BitSet rel_segment = new BitSet(256);
  1113. // Static initializer for rel_segment
  1114. static {
  1115. rel_segment.or(unreserved);
  1116. rel_segment.or(escaped);
  1117. rel_segment.set(';');
  1118. rel_segment.set('@');
  1119. rel_segment.set('&');
  1120. rel_segment.set('=');
  1121. rel_segment.set('+');
  1122. rel_segment.set('$');
  1123. rel_segment.set(',');
  1124. }
  1125. /**
  1126. * BitSet for rel_path.
  1127. * <p><blockquote><pre>
  1128. * rel_path = rel_segment [ abs_path ]
  1129. * </pre></blockquote><p>
  1130. */
  1131. protected static final BitSet rel_path = new BitSet(256);
  1132. // Static initializer for rel_path
  1133. static {
  1134. rel_path.or(rel_segment);
  1135. rel_path.or(abs_path);
  1136. }
  1137. /**
  1138. * BitSet for net_path.
  1139. * <p><blockquote><pre>
  1140. * net_path = "//" authority [ abs_path ]
  1141. * </pre></blockquote><p>
  1142. */
  1143. protected static final BitSet net_path = new BitSet(256);
  1144. // Static initializer for net_path
  1145. static {
  1146. net_path.set('/');
  1147. net_path.or(authority);
  1148. net_path.or(abs_path);
  1149. }
  1150. /**
  1151. * BitSet for hier_part.
  1152. * <p><blockquote><pre>
  1153. * hier_part = ( net_path | abs_path ) [ "?" query ]
  1154. * </pre></blockquote><p>
  1155. */
  1156. protected static final BitSet hier_part = new BitSet(256);
  1157. // Static initializer for hier_part
  1158. static {
  1159. hier_part.or(net_path);
  1160. hier_part.or(abs_path);
  1161. // hier_part.set('?'); aleady included
  1162. hier_part.or(query);
  1163. }
  1164. /**
  1165. * BitSet for relativeURI.
  1166. * <p><blockquote><pre>
  1167. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  1168. * </pre></blockquote><p>
  1169. */
  1170. protected static final BitSet relativeURI = new BitSet(256);
  1171. // Static initializer for relativeURI
  1172. static {
  1173. relativeURI.or(net_path);
  1174. relativeURI.or(abs_path);
  1175. relativeURI.or(rel_path);
  1176. // relativeURI.set('?'); aleady included
  1177. relativeURI.or(query);
  1178. }
  1179. /**
  1180. * BitSet for absoluteURI.
  1181. * <p><blockquote><pre>
  1182. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  1183. * </pre></blockquote><p>
  1184. */
  1185. protected static final BitSet absoluteURI = new BitSet(256);
  1186. // Static initializer for absoluteURI
  1187. static {
  1188. absoluteURI.or(scheme);
  1189. absoluteURI.set(':');
  1190. absoluteURI.or(hier_part);
  1191. absoluteURI.or(opaque_part);
  1192. }
  1193. /**
  1194. * BitSet for URI-reference.
  1195. * <p><blockquote><pre>
  1196. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  1197. * </pre></blockquote><p>
  1198. */
  1199. protected static final BitSet URI_reference = new BitSet(256);
  1200. // Static initializer for URI_reference
  1201. static {
  1202. URI_reference.or(absoluteURI);
  1203. URI_reference.or(relativeURI);
  1204. URI_reference.set('#');
  1205. URI_reference.or(fragment);
  1206. }
  1207. // ---------------------------- Characters disallowed within the URI syntax
  1208. // Excluded US-ASCII Characters are like control, space, delims and unwise
  1209. /**
  1210. * BitSet for control.
  1211. */
  1212. public static final BitSet control = new BitSet(256);
  1213. // Static initializer for control
  1214. static {
  1215. for (int i = 0; i <= 0x1F; i++) {
  1216. control.set(i);
  1217. }
  1218. control.set(0x7F);
  1219. }
  1220. /**
  1221. * BitSet for space.
  1222. */
  1223. public static final BitSet space = new BitSet(256);
  1224. // Static initializer for space
  1225. static {
  1226. space.set(0x20);
  1227. }
  1228. /**
  1229. * BitSet for delims.
  1230. */
  1231. public static final BitSet delims = new BitSet(256);
  1232. // Static initializer for delims
  1233. static {
  1234. delims.set('<');
  1235. delims.set('>');
  1236. delims.set('#');
  1237. delims.set('%');
  1238. delims.set('"');
  1239. }
  1240. /**
  1241. * BitSet for unwise.
  1242. */
  1243. public static final BitSet unwise = new BitSet(256);
  1244. // Static initializer for unwise
  1245. static {
  1246. unwise.set('{');
  1247. unwise.set('}');
  1248. unwise.set('|');
  1249. unwise.set('\\');
  1250. unwise.set('^');
  1251. unwise.set('[');
  1252. unwise.set(']');
  1253. unwise.set('`');
  1254. }
  1255. /**
  1256. * Disallowed rel_path before escaping.
  1257. */
  1258. public static final BitSet disallowed_rel_path = new BitSet(256);
  1259. // Static initializer for disallowed_rel_path
  1260. static {
  1261. disallowed_rel_path.or(uric);
  1262. disallowed_rel_path.andNot(rel_path);
  1263. }
  1264. /**
  1265. * Disallowed opaque_part before escaping.
  1266. */
  1267. public static final BitSet disallowed_opaque_part = new BitSet(256);
  1268. // Static initializer for disallowed_opaque_part
  1269. static {
  1270. disallowed_opaque_part.or(uric);
  1271. disallowed_opaque_part.andNot(opaque_part);
  1272. }
  1273. // ----------------------- Characters allowed within and for each component
  1274. /**
  1275. * Those characters that are allowed for the authority component.
  1276. */
  1277. public static final BitSet allowed_authority = new BitSet(256);
  1278. // Static initializer for allowed_authority
  1279. static {
  1280. allowed_authority.or(authority);
  1281. allowed_authority.clear('%');
  1282. }
  1283. /**
  1284. * Those characters that are allowed for the opaque_part.
  1285. */
  1286. public static final BitSet allowed_opaque_part = new BitSet(256);
  1287. // Static initializer for allowed_opaque_part
  1288. static {
  1289. allowed_opaque_part.or(opaque_part);
  1290. allowed_opaque_part.clear('%');
  1291. }
  1292. /**
  1293. * Those characters that are allowed for the reg_name.
  1294. */
  1295. public static final BitSet allowed_reg_name = new BitSet(256);
  1296. // Static initializer for allowed_reg_name
  1297. static {
  1298. allowed_reg_name.or(reg_name);
  1299. // allowed_reg_name.andNot(percent);
  1300. allowed_reg_name.clear('%');
  1301. }
  1302. /**
  1303. * Those characters that are allowed for the userinfo component.
  1304. */
  1305. public static final BitSet allowed_userinfo = new BitSet(256);
  1306. // Static initializer for allowed_userinfo
  1307. static {
  1308. allowed_userinfo.or(userinfo);
  1309. // allowed_userinfo.andNot(percent);
  1310. allowed_userinfo.clear('%');
  1311. }
  1312. /**
  1313. * Those characters that are allowed for within the userinfo component.
  1314. */
  1315. public static final BitSet allowed_within_userinfo = new BitSet(256);
  1316. // Static initializer for allowed_within_userinfo
  1317. static {
  1318. allowed_within_userinfo.or(within_userinfo);
  1319. allowed_within_userinfo.clear('%');
  1320. }
  1321. /**
  1322. * Those characters that are allowed for the IPv6reference component.
  1323. * The characters '[', ']' in IPv6reference should be excluded.
  1324. */
  1325. public static final BitSet allowed_IPv6reference = new BitSet(256);
  1326. // Static initializer for allowed_IPv6reference
  1327. static {
  1328. allowed_IPv6reference.or(IPv6reference);
  1329. // allowed_IPv6reference.andNot(unwise);
  1330. allowed_IPv6reference.clear('[');
  1331. allowed_IPv6reference.clear(']');
  1332. }
  1333. /**
  1334. * Those characters that are allowed for the host component.
  1335. * The characters '[', ']' in IPv6reference should be excluded.
  1336. */
  1337. public static final BitSet allowed_host = new BitSet(256);
  1338. // Static initializer for allowed_host
  1339. static {
  1340. allowed_host.or(hostname);
  1341. allowed_host.or(allowed_IPv6reference);
  1342. }
  1343. /**
  1344. * Those characters that are allowed for the authority component.
  1345. */
  1346. public static final BitSet allowed_within_authority = new BitSet(256);
  1347. // Static initializer for allowed_within_authority
  1348. static {
  1349. allowed_within_authority.or(server);
  1350. allowed_within_authority.or(reg_name);
  1351. allowed_within_authority.clear(';');
  1352. allowed_within_authority.clear(':');
  1353. allowed_within_authority.clear('@');
  1354. allowed_within_authority.clear('?');
  1355. allowed_within_authority.clear('/');
  1356. }
  1357. /**
  1358. * Those characters that are allowed for the abs_path.
  1359. */
  1360. public static final BitSet allowed_abs_path = new BitSet(256);
  1361. // Static initializer for allowed_abs_path
  1362. static {
  1363. allowed_abs_path.or(abs_path);
  1364. // allowed_abs_path.set('/'); // aleady included
  1365. allowed_abs_path.andNot(percent);
  1366. }
  1367. /**
  1368. * Those characters that are allowed for the rel_path.
  1369. */
  1370. public static final BitSet allowed_rel_path = new BitSet(256);
  1371. // Static initializer for allowed_rel_path
  1372. static {
  1373. allowed_rel_path.or(rel_path);
  1374. allowed_rel_path.clear('%');
  1375. }
  1376. /**
  1377. * Those characters that are allowed within the path.
  1378. */
  1379. public static final BitSet allowed_within_path = new BitSet(256);
  1380. // Static initializer for allowed_within_path
  1381. static {
  1382. allowed_within_path.or(abs_path);
  1383. allowed_within_path.clear('/');
  1384. allowed_within_path.clear(';');
  1385. allowed_within_path.clear('=');
  1386. allowed_within_path.clear('?');
  1387. }
  1388. /**
  1389. * Those characters that are allowed for the query component.
  1390. */
  1391. public static final BitSet allowed_query = new BitSet(256);
  1392. // Static initializer for allowed_query
  1393. static {
  1394. allowed_query.or(uric);
  1395. allowed_query.clear('%');
  1396. }
  1397. /**
  1398. * Those characters that are allowed within the query component.
  1399. */
  1400. public static final BitSet allowed_within_query = new BitSet(256);
  1401. // Static initializer for allowed_within_query
  1402. static {
  1403. allowed_within_query.or(allowed_query);
  1404. allowed_within_query.andNot(reserved); // excluded 'reserved'
  1405. }
  1406. /**
  1407. * Those characters that are allowed for the fragment component.
  1408. */
  1409. public static final BitSet allowed_fragment = new BitSet(256);
  1410. // Static initializer for allowed_fragment
  1411. static {
  1412. allowed_fragment.or(uric);
  1413. allowed_fragment.clear('%');
  1414. }
  1415. // ------------------------------------------- Flags for this URI-reference
  1416. // TODO: Figure out what all these variables are for and provide javadoc
  1417. // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  1418. // absoluteURI = scheme ":" ( hier_part | opaque_part )
  1419. protected boolean _is_hier_part;
  1420. protected boolean _is_opaque_part;
  1421. // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  1422. // hier_part = ( net_path | abs_path ) [ "?" query ]
  1423. protected boolean _is_net_path;
  1424. protected boolean _is_abs_path;
  1425. protected boolean _is_rel_path;
  1426. // net_path = "//" authority [ abs_path ]