1. /*
  2. * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.22 2004/04/18 23:51:34 jsdever Exp $
  3. * $Revision: 1.22 $
  4. * $Date: 2004/04/18 23:51:34 $
  5. *
  6. * ====================================================================
  7. *
  8. * Copyright 2002-2004 The Apache Software Foundation
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * ====================================================================
  22. *
  23. * This software consists of voluntary contributions made by many
  24. * individuals on behalf of the Apache Software Foundation. For more
  25. * information on the Apache Software Foundation, please see
  26. * <http://www.apache.org/>.
  27. *
  28. */
  29. package org.apache.commons.httpclient;
  30. import java.io.ByteArrayOutputStream;
  31. import java.io.IOException;
  32. import java.io.InputStream;
  33. import org.apache.commons.httpclient.util.EncodingUtil;
  34. import org.apache.commons.httpclient.util.ExceptionUtil;
  35. import org.apache.commons.logging.Log;
  36. import org.apache.commons.logging.LogFactory;
  37. /**
  38. * <p>Transparently coalesces chunks of a HTTP stream that uses
  39. * Transfer-Encoding chunked.</p>
  40. *
  41. * <p>Note that this class NEVER closes the underlying stream, even when close
  42. * gets called. Instead, it will read until the "end" of its chunking on close,
  43. * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
  44. * not requiring the client to remember to read the entire contents of the
  45. * response.</p>
  46. *
  47. * @author Ortwin Glück
  48. * @author Sean C. Sullivan
  49. * @author Martin Elwin
  50. * @author Eric Johnson
  51. * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
  52. * @author Michael Becke
  53. * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
  54. *
  55. * @since 2.0
  56. *
  57. */
  58. public class ChunkedInputStream extends InputStream {
  59. /** The inputstream that we're wrapping */
  60. private InputStream in;
  61. /** The chunk size */
  62. private int chunkSize;
  63. /** The current position within the current chunk */
  64. private int pos;
  65. /** True if we'are at the beginning of stream */
  66. private boolean bof = true;
  67. /** True if we've reached the end of stream */
  68. private boolean eof = false;
  69. /** True if this stream is closed */
  70. private boolean closed = false;
  71. /** The method that this stream came from */
  72. private HttpMethod method;
  73. /** Log object for this class. */
  74. private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
  75. /**
  76. *
  77. *
  78. * @param in must be non-null
  79. * @param method must be non-null
  80. *
  81. * @throws IOException If an IO error occurs
  82. */
  83. public ChunkedInputStream(
  84. final InputStream in, final HttpMethod method) throws IOException {
  85. if (in == null) {
  86. throw new IllegalArgumentException("InputStream parameter may not be null");
  87. }
  88. if (method == null) {
  89. throw new IllegalArgumentException("HttpMethod parameter may not be null");
  90. }
  91. this.in = in;
  92. this.method = method;
  93. this.pos = 0;
  94. }
  95. /**
  96. * <p> Returns all the data in a chunked stream in coalesced form. A chunk
  97. * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
  98. * is detected.</p>
  99. *
  100. * <p> Trailer headers are read automcatically at the end of the stream and
  101. * can be obtained with the getResponseFooters() method.</p>
  102. *
  103. * @return -1 of the end of the stream has been reached or the next data
  104. * byte
  105. * @throws IOException If an IO problem occurs
  106. *
  107. * @see HttpMethod#getResponseFooters()
  108. */
  109. public int read() throws IOException {
  110. if (closed) {
  111. throw new IOException("Attempted read from closed stream.");
  112. }
  113. if (eof) {
  114. return -1;
  115. }
  116. if (pos >= chunkSize) {
  117. nextChunk();
  118. if (eof) {
  119. return -1;
  120. }
  121. }
  122. pos++;
  123. return in.read();
  124. }
  125. /**
  126. * Read some bytes from the stream.
  127. * @param b The byte array that will hold the contents from the stream.
  128. * @param off The offset into the byte array at which bytes will start to be
  129. * placed.
  130. * @param len the maximum number of bytes that can be returned.
  131. * @return The number of bytes returned or -1 if the end of stream has been
  132. * reached.
  133. * @see java.io.InputStream#read(byte[], int, int)
  134. * @throws IOException if an IO problem occurs.
  135. */
  136. public int read (byte[] b, int off, int len) throws IOException {
  137. if (closed) {
  138. throw new IOException("Attempted read from closed stream.");
  139. }
  140. if (eof) {
  141. return -1;
  142. }
  143. if (pos >= chunkSize) {
  144. nextChunk();
  145. if (eof) {
  146. return -1;
  147. }
  148. }
  149. len = Math.min(len, chunkSize - pos);
  150. int count = in.read(b, off, len);
  151. pos += count;
  152. return count;
  153. }
  154. /**
  155. * Read some bytes from the stream.
  156. * @param b The byte array that will hold the contents from the stream.
  157. * @return The number of bytes returned or -1 if the end of stream has been
  158. * reached.
  159. * @see java.io.InputStream#read(byte[])
  160. * @throws IOException if an IO problem occurs.
  161. */
  162. public int read (byte[] b) throws IOException {
  163. return read(b, 0, b.length);
  164. }
  165. /**
  166. * Read the CRLF terminator.
  167. * @throws IOException If an IO error occurs.
  168. */
  169. private void readCRLF() throws IOException {
  170. int cr = in.read();
  171. int lf = in.read();
  172. if ((cr != '\r') || (lf != '\n')) {
  173. throw new IOException(
  174. "CRLF expected at end of chunk: " + cr + "/" + lf);
  175. }
  176. }
  177. /**
  178. * Read the next chunk.
  179. * @throws IOException If an IO error occurs.
  180. */
  181. private void nextChunk() throws IOException {
  182. if (!bof) {
  183. readCRLF();
  184. }
  185. chunkSize = getChunkSizeFromInputStream(in);
  186. bof = false;
  187. pos = 0;
  188. if (chunkSize == 0) {
  189. eof = true;
  190. parseTrailerHeaders();
  191. }
  192. }
  193. /**
  194. * Expects the stream to start with a chunksize in hex with optional
  195. * comments after a semicolon. The line must end with a CRLF: "a3; some
  196. * comment\r\n" Positions the stream at the start of the next line.
  197. *
  198. * @param in The new input stream.
  199. * @param required <tt>true<tt/> if a valid chunk must be present,
  200. * <tt>false<tt/> otherwise.
  201. *
  202. * @return the chunk size as integer
  203. *
  204. * @throws IOException when the chunk size could not be parsed
  205. */
  206. private static int getChunkSizeFromInputStream(final InputStream in)
  207. throws IOException {
  208. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  209. // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
  210. int state = 0;
  211. while (state != -1) {
  212. int b = in.read();
  213. if (b == -1) {
  214. throw new IOException("chunked stream ended unexpectedly");
  215. }
  216. switch (state) {
  217. case 0:
  218. switch (b) {
  219. case '\r':
  220. state = 1;
  221. break;
  222. case '\"':
  223. state = 2;
  224. /* fall through */
  225. default:
  226. baos.write(b);
  227. }
  228. break;
  229. case 1:
  230. if (b == '\n') {
  231. state = -1;
  232. } else {
  233. // this was not CRLF
  234. throw new IOException("Protocol violation: Unexpected"
  235. + " single newline character in chunk size");
  236. }
  237. break;
  238. case 2:
  239. switch (b) {
  240. case '\\':
  241. b = in.read();
  242. baos.write(b);
  243. break;
  244. case '\"':
  245. state = 0;
  246. /* fall through */
  247. default:
  248. baos.write(b);
  249. }
  250. break;
  251. default: throw new RuntimeException("assertion failed");
  252. }
  253. }
  254. //parse data
  255. String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
  256. int separator = dataString.indexOf(';');
  257. dataString = (separator > 0)
  258. ? dataString.substring(0, separator).trim()
  259. : dataString.trim();
  260. int result;
  261. try {
  262. result = Integer.parseInt(dataString.trim(), 16);
  263. } catch (NumberFormatException e) {
  264. throw new IOException ("Bad chunk size: " + dataString);
  265. }
  266. return result;
  267. }
  268. /**
  269. * Reads and stores the Trailer headers.
  270. * @throws IOException If an IO problem occurs
  271. */
  272. private void parseTrailerHeaders() throws IOException {
  273. Header[] footers = null;
  274. try {
  275. footers = HttpParser.parseHeaders(in,
  276. method.getParams().getHttpElementCharset());
  277. } catch(HttpException e) {
  278. LOG.error("Error parsing trailer headers", e);
  279. IOException ioe = new IOException(e.getMessage());
  280. ExceptionUtil.initCause(ioe, e);
  281. throw ioe;
  282. }
  283. for (int i = 0; i < footers.length; i++) {
  284. method.addResponseFooter(footers[i]);
  285. }
  286. }
  287. /**
  288. * Upon close, this reads the remainder of the chunked message,
  289. * leaving the underlying socket at a position to start reading the
  290. * next response without scanning.
  291. * @throws IOException If an IO problem occurs.
  292. */
  293. public void close() throws IOException {
  294. if (!closed) {
  295. try {
  296. if (!eof) {
  297. exhaustInputStream(this);
  298. }
  299. } finally {
  300. eof = true;
  301. closed = true;
  302. }
  303. }
  304. }
  305. /**
  306. * Exhaust an input stream, reading until EOF has been encountered.
  307. *
  308. * <p>Note that this function is intended as a non-public utility.
  309. * This is a little weird, but it seemed silly to make a utility
  310. * class for this one function, so instead it is just static and
  311. * shared that way.</p>
  312. *
  313. * @param inStream The {@link InputStream} to exhaust.
  314. * @throws IOException If an IO problem occurs
  315. */
  316. static void exhaustInputStream(InputStream inStream) throws IOException {
  317. // read and discard the remainder of the message
  318. byte buffer[] = new byte[1024];
  319. while (inStream.read(buffer) >= 0) {
  320. ;
  321. }
  322. }
  323. }