001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *   http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.internal;
018
019import java.io.*;
020import java.nio.*;
021import java.nio.charset.*;
022import java.util.*;
023
024/**
025 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
026 * and transforms it to a byte stream using a specified charset encoding. The stream
027 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
028 * encodings supported by the JRE are handled correctly. In particular for charsets such as
029 * UTF-16, the implementation ensures that one and only one byte order marker
030 * is produced.
031 * <p>
032 * Since in general it is not possible to predict the number of characters to be read from the
033 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
034 * the {@link Reader} are buffered. There is therefore no well defined correlation
035 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
036 * This also implies that in general there is no need to wrap the underlying {@link Reader}
037 * in a {@link java.io.BufferedReader}.
038 * <p>
039 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
040 * in the following example, reading from {@code in2} would return the same byte
041 * sequence as reading from {@code in} (provided that the initial byte sequence is legal
042 * with respect to the charset encoding):
043 * <pre>
044 * InputStream in = ...
045 * Charset cs = ...
046 * InputStreamReader reader = new InputStreamReader(in, cs);
047 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
048 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
049 * except that the control flow is reversed: both classes transform a character stream
050 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
051 * while {@link ReaderInputStream} pulls it from the underlying stream.
052 * <p>
053 * Note that while there are use cases where there is no alternative to using
054 * this class, very often the need to use this class is an indication of a flaw
055 * in the design of the code. This class is typically used in situations where an existing
056 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
057 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
058 * where this problem may appear is when implementing the {@code javax.activation.DataSource}
059 * interface from the Java Activation Framework.
060 * <p>
061 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
062 * read operation will block or not, it is not possible to provide a meaningful
063 * implementation of the {@link InputStream#available()} method. A call to this method
064 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
065 * <p>
066 *
067 * <h5 class='section'>Notes:</h5><ul>
068 *    <li class='warn'>This class is not thread safe and is typically discarded after one use.
069 * </ul>
070 *
071 * <h5 class='section'>See Also:</h5><ul>
072 * </ul>
073 */
074public class ReaderInputStream extends InputStream {
075   private static final int DEFAULT_BUFFER_SIZE = 1024;
076
077   private final Reader reader;
078   private final CharsetEncoder encoder;
079
080   /**
081    * CharBuffer used as input for the decoder. It should be reasonably
082    * large as we read data from the underlying Reader into this buffer.
083    */
084   private final CharBuffer encoderIn;
085
086   /**
087    * ByteBuffer used as output for the decoder. This buffer can be small
088    * as it is only used to transfer data from the decoder to the
089    * buffer provided by the caller.
090    */
091   private final ByteBuffer encoderOut;
092
093   private CoderResult lastCoderResult;
094   private boolean endOfInput;
095
096   /**
097    * Construct a new {@link ReaderInputStream}.
098    *
099    * @param reader the target {@link Reader}
100    * @param encoder the charset encoder
101    * @since 2.1
102    */
103   public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) {
104      this(reader, encoder, DEFAULT_BUFFER_SIZE);
105   }
106
107   /**
108    * Construct a new {@link ReaderInputStream}.
109    *
110    * @param reader the target {@link Reader}
111    * @param encoder the charset encoder
112    * @param bufferSize the size of the input buffer in number of characters
113    */
114   public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) {
115      this.reader = reader;
116      this.encoder = encoder;
117      this.encoderIn = CharBuffer.allocate(bufferSize);
118      this.encoderIn.flip(); // Fixes Java 11 issue.
119      this.encoderOut = ByteBuffer.allocate(128);
120      this.encoderOut.flip(); // Fixes Java 11 issue.
121   }
122
123   /**
124    * Construct a new {@link ReaderInputStream}.
125    *
126    * @param reader the target {@link Reader}
127    * @param charset the charset encoding
128    * @param bufferSize the size of the input buffer in number of characters
129    */
130   public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
131      this(reader,
132          charset.newEncoder()
133               .onMalformedInput(CodingErrorAction.REPLACE)
134               .onUnmappableCharacter(CodingErrorAction.REPLACE),
135          bufferSize);
136   }
137
138   /**
139    * Construct a new {@link ReaderInputStream} with a default input buffer size of
140    * <c>1024</c> characters.
141    *
142    * @param reader the target {@link Reader}
143    * @param charset the charset encoding
144    */
145   public ReaderInputStream(final Reader reader, final Charset charset) {
146      this(reader, charset, DEFAULT_BUFFER_SIZE);
147   }
148
149   /**
150    * Construct a new {@link ReaderInputStream}.
151    *
152    * @param reader the target {@link Reader}
153    * @param charsetName the name of the charset encoding
154    * @param bufferSize the size of the input buffer in number of characters
155    */
156   public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) {
157      this(reader, Charset.forName(charsetName), bufferSize);
158   }
159
160   /**
161    * Construct a new {@link ReaderInputStream} with a default input buffer size of
162    * <c>1024</c> characters.
163    *
164    * @param reader the target {@link Reader}
165    * @param charsetName the name of the charset encoding
166    */
167   public ReaderInputStream(final Reader reader, final String charsetName) {
168      this(reader, charsetName, DEFAULT_BUFFER_SIZE);
169   }
170
171   /**
172    * Fills the internal char buffer from the reader.
173    *
174    * @throws IOException
175    *        If an I/O error occurs
176    */
177   private void fillBuffer() throws IOException {
178      if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
179         encoderIn.compact();
180         final int position = encoderIn.position();
181         // We don't use Reader#read(CharBuffer) here because it is more efficient
182         // to write directly to the underlying char array (the default implementation
183         // copies data to a temporary char array).
184         final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
185         if (c == -1) {
186            endOfInput = true;
187         } else {
188            encoderIn.position(position+c);
189         }
190         encoderIn.flip();
191      }
192      encoderOut.compact();
193      lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
194      encoderOut.flip();
195   }
196
197   /**
198    * Read the specified number of bytes into an array.
199    *
200    * @param array the byte array to read into
201    * @param off the offset to start reading bytes into
202    * @param len the number of bytes to read
203    * @return the number of bytes read or <code>-1</code>
204    *     if the end of the stream has been reached
205    * @throws IOException if an I/O error occurs
206    */
207   @Override
208   public int read(final byte[] array, int off, int len) throws IOException {
209      Objects.requireNonNull(array, "array");
210      if (len < 0 || off < 0 || (off + len) > array.length) {
211         throw new IndexOutOfBoundsException("Array Size=" + array.length +
212               ", offset=" + off + ", length=" + len);
213      }
214      int read = 0;
215      if (len == 0) {
216         return 0; // Always return 0 if len == 0
217      }
218      while (len > 0) {
219         if (encoderOut.hasRemaining()) {
220            final int c = Math.min(encoderOut.remaining(), len);
221            encoderOut.get(array, off, c);
222            off += c;
223            len -= c;
224            read += c;
225         } else {
226            fillBuffer();
227            if (endOfInput && !encoderOut.hasRemaining()) {
228               break;
229            }
230         }
231      }
232      return read == 0 && endOfInput ? -1 : read;
233   }
234
235   /**
236    * Read the specified number of bytes into an array.
237    *
238    * @param b the byte array to read into
239    * @return the number of bytes read or <code>-1</code>
240    *     if the end of the stream has been reached
241    * @throws IOException if an I/O error occurs
242    */
243   @Override
244   public int read(final byte[] b) throws IOException {
245      return read(b, 0, b.length);
246   }
247
248   /**
249    * Read a single byte.
250    *
251    * @return either the byte read or <code>-1</code> if the end of the stream
252    *     has been reached
253    * @throws IOException if an I/O error occurs
254    */
255   @Override
256   public int read() throws IOException {
257      for (;;) {
258         if (encoderOut.hasRemaining()) {
259            return encoderOut.get() & 0xFF;
260         }
261         fillBuffer();
262         if (endOfInput && !encoderOut.hasRemaining()) {
263            return -1;
264         }
265      }
266   }
267
268   /**
269    * Close the stream. This method will cause the underlying {@link Reader}
270    * to be closed.
271    * @throws IOException if an I/O error occurs
272    */
273   @Override
274   public void close() throws IOException {
275      reader.close();
276   }
277}