001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *   http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.internal;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.Reader;
022import java.nio.*;
023import java.nio.charset.Charset;
024import java.nio.charset.CharsetEncoder;
025import java.nio.charset.CoderResult;
026import java.nio.charset.CodingErrorAction;
027import java.util.Objects;
028
029/**
030 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
031 * and transforms it to a byte stream using a specified charset encoding. The stream
032 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
033 * encodings supported by the JRE are handled correctly. In particular for charsets such as
034 * UTF-16, the implementation ensures that one and only one byte order marker
035 * is produced.
036 * <p>
037 * Since in general it is not possible to predict the number of characters to be read from the
038 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
039 * the {@link Reader} are buffered. There is therefore no well defined correlation
040 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
041 * This also implies that in general there is no need to wrap the underlying {@link Reader}
042 * in a {@link java.io.BufferedReader}.
043 * <p>
044 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
045 * in the following example, reading from {@code in2} would return the same byte
046 * sequence as reading from {@code in} (provided that the initial byte sequence is legal
047 * with respect to the charset encoding):
048 * <pre>
049 * InputStream in = ...
050 * Charset cs = ...
051 * InputStreamReader reader = new InputStreamReader(in, cs);
052 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
053 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
054 * except that the control flow is reversed: both classes transform a character stream
055 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
056 * while {@link ReaderInputStream} pulls it from the underlying stream.
057 * <p>
058 * Note that while there are use cases where there is no alternative to using
059 * this class, very often the need to use this class is an indication of a flaw
060 * in the design of the code. This class is typically used in situations where an existing
061 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
062 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
063 * where this problem may appear is when implementing the {@code javax.activation.DataSource}
064 * interface from the Java Activation Framework.
065 * <p>
066 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
067 * read operation will block or not, it is not possible to provide a meaningful
068 * implementation of the {@link InputStream#available()} method. A call to this method
069 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
070 * <p>
071 *
072 * <h5 class='section'>Notes:</h5><ul>
073 *    <li class='warn'>This class is not thread safe and is typically discarded after one use.
074 * </ul>
075 *
076 * <h5 class='section'>See Also:</h5><ul>
077 * </ul>
078 */
079public class ReaderInputStream extends InputStream {
080   private static final int DEFAULT_BUFFER_SIZE = 1024;
081
082   private final Reader reader;
083   private final CharsetEncoder encoder;
084
085   /**
086    * CharBuffer used as input for the decoder. It should be reasonably
087    * large as we read data from the underlying Reader into this buffer.
088    */
089   private final CharBuffer encoderIn;
090
091   /**
092    * ByteBuffer used as output for the decoder. This buffer can be small
093    * as it is only used to transfer data from the decoder to the
094    * buffer provided by the caller.
095    */
096   private final ByteBuffer encoderOut;
097
098   private CoderResult lastCoderResult;
099   private boolean endOfInput;
100
101   /**
102    * Construct a new {@link ReaderInputStream}.
103    *
104    * @param reader the target {@link Reader}
105    * @param encoder the charset encoder
106    * @since 2.1
107    */
108   public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) {
109      this(reader, encoder, DEFAULT_BUFFER_SIZE);
110   }
111
112   /**
113    * Construct a new {@link ReaderInputStream}.
114    *
115    * @param reader the target {@link Reader}
116    * @param encoder the charset encoder
117    * @param bufferSize the size of the input buffer in number of characters
118    */
119   public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) {
120      this.reader = reader;
121      this.encoder = encoder;
122      this.encoderIn = CharBuffer.allocate(bufferSize);
123      ((Buffer)this.encoderIn).flip(); // Fixes Java 11 issue.
124      this.encoderOut = ByteBuffer.allocate(128);
125      ((Buffer)this.encoderOut).flip(); // Fixes Java 11 issue.
126   }
127
128   /**
129    * Construct a new {@link ReaderInputStream}.
130    *
131    * @param reader the target {@link Reader}
132    * @param charset the charset encoding
133    * @param bufferSize the size of the input buffer in number of characters
134    */
135   public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
136      this(reader,
137          charset.newEncoder()
138               .onMalformedInput(CodingErrorAction.REPLACE)
139               .onUnmappableCharacter(CodingErrorAction.REPLACE),
140          bufferSize);
141   }
142
143   /**
144    * Construct a new {@link ReaderInputStream} with a default input buffer size of
145    * <c>1024</c> characters.
146    *
147    * @param reader the target {@link Reader}
148    * @param charset the charset encoding
149    */
150   public ReaderInputStream(final Reader reader, final Charset charset) {
151      this(reader, charset, DEFAULT_BUFFER_SIZE);
152   }
153
154   /**
155    * Construct a new {@link ReaderInputStream}.
156    *
157    * @param reader the target {@link Reader}
158    * @param charsetName the name of the charset encoding
159    * @param bufferSize the size of the input buffer in number of characters
160    */
161   public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) {
162      this(reader, Charset.forName(charsetName), bufferSize);
163   }
164
165   /**
166    * Construct a new {@link ReaderInputStream} with a default input buffer size of
167    * <c>1024</c> characters.
168    *
169    * @param reader the target {@link Reader}
170    * @param charsetName the name of the charset encoding
171    */
172   public ReaderInputStream(final Reader reader, final String charsetName) {
173      this(reader, charsetName, DEFAULT_BUFFER_SIZE);
174   }
175
176   /**
177    * Fills the internal char buffer from the reader.
178    *
179    * @throws IOException
180    *        If an I/O error occurs
181    */
182   private void fillBuffer() throws IOException {
183      if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
184         encoderIn.compact();
185         final int position = ((Buffer)encoderIn).position();
186         // We don't use Reader#read(CharBuffer) here because it is more efficient
187         // to write directly to the underlying char array (the default implementation
188         // copies data to a temporary char array).
189         final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
190         if (c == -1) {
191            endOfInput = true;
192         } else {
193            ((Buffer)encoderIn).position(position+c);
194         }
195         ((Buffer)encoderIn).flip();
196      }
197      encoderOut.compact();
198      lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
199      ((Buffer)encoderOut).flip();
200   }
201
202   /**
203    * Read the specified number of bytes into an array.
204    *
205    * @param array the byte array to read into
206    * @param off the offset to start reading bytes into
207    * @param len the number of bytes to read
208    * @return the number of bytes read or <code>-1</code>
209    *     if the end of the stream has been reached
210    * @throws IOException if an I/O error occurs
211    */
212   @Override
213   public int read(final byte[] array, int off, int len) throws IOException {
214      Objects.requireNonNull(array, "array");
215      if (len < 0 || off < 0 || (off + len) > array.length) {
216         throw new IndexOutOfBoundsException("Array Size=" + array.length +
217               ", offset=" + off + ", length=" + len);
218      }
219      int read = 0;
220      if (len == 0) {
221         return 0; // Always return 0 if len == 0
222      }
223      while (len > 0) {
224         if (encoderOut.hasRemaining()) {
225            final int c = Math.min(encoderOut.remaining(), len);
226            encoderOut.get(array, off, c);
227            off += c;
228            len -= c;
229            read += c;
230         } else {
231            fillBuffer();
232            if (endOfInput && !encoderOut.hasRemaining()) {
233               break;
234            }
235         }
236      }
237      return read == 0 && endOfInput ? -1 : read;
238   }
239
240   /**
241    * Read the specified number of bytes into an array.
242    *
243    * @param b the byte array to read into
244    * @return the number of bytes read or <code>-1</code>
245    *     if the end of the stream has been reached
246    * @throws IOException if an I/O error occurs
247    */
248   @Override
249   public int read(final byte[] b) throws IOException {
250      return read(b, 0, b.length);
251   }
252
253   /**
254    * Read a single byte.
255    *
256    * @return either the byte read or <code>-1</code> if the end of the stream
257    *     has been reached
258    * @throws IOException if an I/O error occurs
259    */
260   @Override
261   public int read() throws IOException {
262      for (;;) {
263         if (encoderOut.hasRemaining()) {
264            return encoderOut.get() & 0xFF;
265         }
266         fillBuffer();
267         if (endOfInput && !encoderOut.hasRemaining()) {
268            return -1;
269         }
270      }
271   }
272
273   /**
274    * Close the stream. This method will cause the underlying {@link Reader}
275    * to be closed.
276    * @throws IOException if an I/O error occurs
277    */
278   @Override
279   public void close() throws IOException {
280      reader.close();
281   }
282}