001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *   http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.commons.io;
018
019import static org.apache.juneau.commons.utils.AssertionUtils.*;
020
021import java.io.*;
022import java.nio.*;
023import java.nio.charset.*;
024
025/**
026 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
027 * and transforms it to a byte stream using a specified charset encoding. The stream
028 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
029 * encodings supported by the JRE are handled correctly. In particular for charsets such as
030 * UTF-16, the implementation ensures that one and only one byte order marker
031 * is produced.
032 * <p>
033 * Since in general it is not possible to predict the number of characters to be read from the
034 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
035 * the {@link Reader} are buffered. There is therefore no well defined correlation
036 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
037 * This also implies that in general there is no need to wrap the underlying {@link Reader}
038 * in a {@link java.io.BufferedReader}.
039 * <p>
040 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
041 * in the following example, reading from {@code in2} would return the same byte
042 * sequence as reading from {@code in} (provided that the initial byte sequence is legal
043 * with respect to the charset encoding):
044 * <pre>
045 * InputStream in = ...
046 * Charset cs = ...
047 * InputStreamReader reader = new InputStreamReader(in, cs);
048 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
049 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
050 * except that the control flow is reversed: both classes transform a character stream
051 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
052 * while {@link ReaderInputStream} pulls it from the underlying stream.
053 * <p>
054 * Note that while there are use cases where there is no alternative to using
055 * this class, very often the need to use this class is an indication of a flaw
056 * in the design of the code. This class is typically used in situations where an existing
057 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
058 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
059 * where this problem may appear is when implementing the {@code javax.activation.DataSource}
060 * interface from the Java Activation Framework.
061 * <p>
062 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
063 * read operation will block or not, it is not possible to provide a meaningful
064 * implementation of the {@link InputStream#available()} method. A call to this method
065 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
066 * <p>
067 *
068 * <h5 class='section'>Notes:</h5><ul>
069 *    <li class='warn'>This class is not thread safe and is typically discarded after one use.
070 * </ul>
071 *
072 */
073public class ReaderInputStream extends InputStream {
074   private static final int DEFAULT_BUFFER_SIZE = 1024;
075
076   private final Reader reader;
077   private final CharsetEncoder encoder;
078
079   /**
080    * CharBuffer used as input for the decoder. It should be reasonably
081    * large as we read data from the underlying Reader into this buffer.
082    */
083   private final CharBuffer encoderIn;
084
085   /**
086    * ByteBuffer used as output for the decoder. This buffer can be small
087    * as it is only used to transfer data from the decoder to the
088    * buffer provided by the caller.
089    */
090   private final ByteBuffer encoderOut;
091
092   private CoderResult lastCoderResult;
093   private boolean endOfInput;
094
095   /**
096    * Construct a new {@link ReaderInputStream} with a default input buffer size of
097    * <c>1024</c> characters.
098    *
099    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
100    * @param charset the charset encoding.  Must not be <jk>null</jk>.
101    */
102   public ReaderInputStream(Reader reader, Charset charset) {
103      this(reader, charset, DEFAULT_BUFFER_SIZE);
104   }
105
106   /**
107    * Construct a new {@link ReaderInputStream}.
108    *
109    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
110    * @param charset the charset encoding.  Must not be <jk>null</jk>.
111    * @param bufferSize the size of the input buffer in number of characters.  Must be positive.
112    */
113   @SuppressWarnings("resource")
114   public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
115      // @formatter:off
116      this(assertArgNotNull("reader", reader),
117          assertArgNotNull("charset", charset).newEncoder()
118               .onMalformedInput(CodingErrorAction.REPLACE)
119               .onUnmappableCharacter(CodingErrorAction.REPLACE),
120          bufferSize
121      );
122      // @formatter:on
123   }
124
125   /**
126    * Construct a new {@link ReaderInputStream}.
127    *
128    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
129    * @param encoder the charset encoder.  Must not be <jk>null</jk>.
130    * @since 2.1
131    */
132   public ReaderInputStream(Reader reader, CharsetEncoder encoder) {
133      this(reader, encoder, DEFAULT_BUFFER_SIZE);
134   }
135
136   /**
137    * Construct a new {@link ReaderInputStream}.
138    *
139    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
140    * @param encoder the charset encoder.  Must not be <jk>null</jk>.
141    * @param bufferSize the size of the input buffer in number of characters.  Must be positive.
142    */
143   public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {
144      this.reader = assertArgNotNull("reader", reader);
145      this.encoder = assertArgNotNull("encoder", encoder);
146      assertArg(bufferSize > 0, "Argument 'bufferSize' must be positive.");
147      this.encoderIn = CharBuffer.allocate(bufferSize);
148      this.encoderIn.flip(); // Fixes Java 11 issue.
149      this.encoderOut = ByteBuffer.allocate(128);
150      this.encoderOut.flip(); // Fixes Java 11 issue.
151   }
152
153   /**
154    * Construct a new {@link ReaderInputStream} with a default input buffer size of
155    * <c>1024</c> characters.
156    *
157    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
158    * @param charsetName the name of the charset encoding.  Must not be <jk>null</jk>.
159    */
160   public ReaderInputStream(Reader reader, String charsetName) {
161      this(reader, charsetName, DEFAULT_BUFFER_SIZE);
162   }
163
164   /**
165    * Construct a new {@link ReaderInputStream}.
166    *
167    * @param reader the target {@link Reader}.  Must not be <jk>null</jk>.
168    * @param charsetName the name of the charset encoding.  Must not be <jk>null</jk>.
169    * @param bufferSize the size of the input buffer in number of characters.  Must be positive.
170    */
171   public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
172      this(reader, Charset.forName(assertArgNotNull("charsetName", charsetName)), bufferSize);
173   }
174
175   /**
176    * Close the stream. This method will cause the underlying {@link Reader}
177    * to be closed.
178    * @throws IOException if an I/O error occurs
179    */
180   @Override
181   public void close() throws IOException {
182      reader.close();
183   }
184
185   /**
186    * Read a single byte.
187    *
188    * @return either the byte read or <code>-1</code> if the end of the stream
189    *     has been reached
190    * @throws IOException if an I/O error occurs
191    */
192   @Override
193   public int read() throws IOException {
194      for (;;) {
195         if (encoderOut.hasRemaining()) {
196            return encoderOut.get() & 0xFF;
197         }
198         fillBuffer();
199         if (endOfInput && ! encoderOut.hasRemaining()) {
200            return -1;
201         }
202      }
203   }
204
205   /**
206    * Read the specified number of bytes into an array.
207    *
208    * @param b the byte array to read into
209    * @return the number of bytes read or <code>-1</code>
210    *     if the end of the stream has been reached
211    * @throws IOException if an I/O error occurs
212    */
213   @Override
214   public int read(byte[] b) throws IOException {
215      assertArgNotNull("b", b);
216      return read(b, 0, b.length);
217   }
218
219   /**
220    * Read the specified number of bytes into an array.
221    *
222    * @param array the byte array to read into
223    * @param off the offset to start reading bytes into
224    * @param len the number of bytes to read
225    * @return the number of bytes read or <code>-1</code>
226    *     if the end of the stream has been reached
227    * @throws IOException if an I/O error occurs
228    */
229   @Override
230   public int read(byte[] array, int off, int len) throws IOException {
231      assertArgNotNull("array", array);
232      if (len < 0 || off < 0 || (off + len) > array.length) {
233         throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
234      }
235      int read = 0;
236      if (len == 0) {
237         return 0; // Always return 0 if len == 0
238      }
239      while (len > 0) {
240         if (encoderOut.hasRemaining()) {
241            final int c = Math.min(encoderOut.remaining(), len);
242            encoderOut.get(array, off, c);
243            off += c;
244            len -= c;
245            read += c;
246         } else {
247            fillBuffer();
248            if (endOfInput && ! encoderOut.hasRemaining()) {
249               break;
250            }
251         }
252      }
253      return read == 0 && endOfInput ? -1 : read;
254   }
255
256   /**
257    * Fills the internal char buffer from the reader.
258    *
259    * @throws IOException
260    *        If an I/O error occurs
261    */
262   private void fillBuffer() throws IOException {
263      if (! endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
264         encoderIn.compact();
265         final int position = encoderIn.position();
266         // We don't use Reader#read(CharBuffer) here because it is more efficient
267         // to write directly to the underlying char array (the default implementation
268         // copies data to a temporary char array).
269         final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
270         if (c == -1) {
271            endOfInput = true;
272         } else {
273            encoderIn.position(position + c);
274         }
275         encoderIn.flip();
276      }
277      encoderOut.compact();
278      lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
279      encoderOut.flip();
280   }
281}