001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.internal; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.Reader; 022import java.nio.*; 023import java.nio.charset.Charset; 024import java.nio.charset.CharsetEncoder; 025import java.nio.charset.CoderResult; 026import java.nio.charset.CodingErrorAction; 027import java.util.Objects; 028 029/** 030 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 031 * and transforms it to a byte stream using a specified charset encoding. The stream 032 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 033 * encodings supported by the JRE are handled correctly. In particular for charsets such as 034 * UTF-16, the implementation ensures that one and only one byte order marker 035 * is produced. 036 * <p> 037 * Since in general it is not possible to predict the number of characters to be read from the 038 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 039 * the {@link Reader} are buffered. There is therefore no well defined correlation 040 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 041 * This also implies that in general there is no need to wrap the underlying {@link Reader} 042 * in a {@link java.io.BufferedReader}. 043 * <p> 044 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 045 * in the following example, reading from {@code in2} would return the same byte 046 * sequence as reading from {@code in} (provided that the initial byte sequence is legal 047 * with respect to the charset encoding): 048 * <pre> 049 * InputStream in = ... 050 * Charset cs = ... 051 * InputStreamReader reader = new InputStreamReader(in, cs); 052 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 053 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 054 * except that the control flow is reversed: both classes transform a character stream 055 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 056 * while {@link ReaderInputStream} pulls it from the underlying stream. 057 * <p> 058 * Note that while there are use cases where there is no alternative to using 059 * this class, very often the need to use this class is an indication of a flaw 060 * in the design of the code. This class is typically used in situations where an existing 061 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 062 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 063 * where this problem may appear is when implementing the {@code javax.activation.DataSource} 064 * interface from the Java Activation Framework. 065 * <p> 066 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 067 * read operation will block or not, it is not possible to provide a meaningful 068 * implementation of the {@link InputStream#available()} method. A call to this method 069 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 070 * <p> 071 * 072 * <h5 class='section'>Notes:</h5><ul> 073 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 074 * </ul> 075 * 076 * <h5 class='section'>See Also:</h5><ul> 077 * </ul> 078 */ 079public class ReaderInputStream extends InputStream { 080 private static final int DEFAULT_BUFFER_SIZE = 1024; 081 082 private final Reader reader; 083 private final CharsetEncoder encoder; 084 085 /** 086 * CharBuffer used as input for the decoder. It should be reasonably 087 * large as we read data from the underlying Reader into this buffer. 088 */ 089 private final CharBuffer encoderIn; 090 091 /** 092 * ByteBuffer used as output for the decoder. This buffer can be small 093 * as it is only used to transfer data from the decoder to the 094 * buffer provided by the caller. 095 */ 096 private final ByteBuffer encoderOut; 097 098 private CoderResult lastCoderResult; 099 private boolean endOfInput; 100 101 /** 102 * Construct a new {@link ReaderInputStream}. 103 * 104 * @param reader the target {@link Reader} 105 * @param encoder the charset encoder 106 * @since 2.1 107 */ 108 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) { 109 this(reader, encoder, DEFAULT_BUFFER_SIZE); 110 } 111 112 /** 113 * Construct a new {@link ReaderInputStream}. 114 * 115 * @param reader the target {@link Reader} 116 * @param encoder the charset encoder 117 * @param bufferSize the size of the input buffer in number of characters 118 */ 119 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) { 120 this.reader = reader; 121 this.encoder = encoder; 122 this.encoderIn = CharBuffer.allocate(bufferSize); 123 ((Buffer)this.encoderIn).flip(); // Fixes Java 11 issue. 124 this.encoderOut = ByteBuffer.allocate(128); 125 ((Buffer)this.encoderOut).flip(); // Fixes Java 11 issue. 126 } 127 128 /** 129 * Construct a new {@link ReaderInputStream}. 130 * 131 * @param reader the target {@link Reader} 132 * @param charset the charset encoding 133 * @param bufferSize the size of the input buffer in number of characters 134 */ 135 public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) { 136 this(reader, 137 charset.newEncoder() 138 .onMalformedInput(CodingErrorAction.REPLACE) 139 .onUnmappableCharacter(CodingErrorAction.REPLACE), 140 bufferSize); 141 } 142 143 /** 144 * Construct a new {@link ReaderInputStream} with a default input buffer size of 145 * <c>1024</c> characters. 146 * 147 * @param reader the target {@link Reader} 148 * @param charset the charset encoding 149 */ 150 public ReaderInputStream(final Reader reader, final Charset charset) { 151 this(reader, charset, DEFAULT_BUFFER_SIZE); 152 } 153 154 /** 155 * Construct a new {@link ReaderInputStream}. 156 * 157 * @param reader the target {@link Reader} 158 * @param charsetName the name of the charset encoding 159 * @param bufferSize the size of the input buffer in number of characters 160 */ 161 public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) { 162 this(reader, Charset.forName(charsetName), bufferSize); 163 } 164 165 /** 166 * Construct a new {@link ReaderInputStream} with a default input buffer size of 167 * <c>1024</c> characters. 168 * 169 * @param reader the target {@link Reader} 170 * @param charsetName the name of the charset encoding 171 */ 172 public ReaderInputStream(final Reader reader, final String charsetName) { 173 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 174 } 175 176 /** 177 * Fills the internal char buffer from the reader. 178 * 179 * @throws IOException 180 * If an I/O error occurs 181 */ 182 private void fillBuffer() throws IOException { 183 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 184 encoderIn.compact(); 185 final int position = ((Buffer)encoderIn).position(); 186 // We don't use Reader#read(CharBuffer) here because it is more efficient 187 // to write directly to the underlying char array (the default implementation 188 // copies data to a temporary char array). 189 final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 190 if (c == -1) { 191 endOfInput = true; 192 } else { 193 ((Buffer)encoderIn).position(position+c); 194 } 195 ((Buffer)encoderIn).flip(); 196 } 197 encoderOut.compact(); 198 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 199 ((Buffer)encoderOut).flip(); 200 } 201 202 /** 203 * Read the specified number of bytes into an array. 204 * 205 * @param array the byte array to read into 206 * @param off the offset to start reading bytes into 207 * @param len the number of bytes to read 208 * @return the number of bytes read or <code>-1</code> 209 * if the end of the stream has been reached 210 * @throws IOException if an I/O error occurs 211 */ 212 @Override 213 public int read(final byte[] array, int off, int len) throws IOException { 214 Objects.requireNonNull(array, "array"); 215 if (len < 0 || off < 0 || (off + len) > array.length) { 216 throw new IndexOutOfBoundsException("Array Size=" + array.length + 217 ", offset=" + off + ", length=" + len); 218 } 219 int read = 0; 220 if (len == 0) { 221 return 0; // Always return 0 if len == 0 222 } 223 while (len > 0) { 224 if (encoderOut.hasRemaining()) { 225 final int c = Math.min(encoderOut.remaining(), len); 226 encoderOut.get(array, off, c); 227 off += c; 228 len -= c; 229 read += c; 230 } else { 231 fillBuffer(); 232 if (endOfInput && !encoderOut.hasRemaining()) { 233 break; 234 } 235 } 236 } 237 return read == 0 && endOfInput ? -1 : read; 238 } 239 240 /** 241 * Read the specified number of bytes into an array. 242 * 243 * @param b the byte array to read into 244 * @return the number of bytes read or <code>-1</code> 245 * if the end of the stream has been reached 246 * @throws IOException if an I/O error occurs 247 */ 248 @Override 249 public int read(final byte[] b) throws IOException { 250 return read(b, 0, b.length); 251 } 252 253 /** 254 * Read a single byte. 255 * 256 * @return either the byte read or <code>-1</code> if the end of the stream 257 * has been reached 258 * @throws IOException if an I/O error occurs 259 */ 260 @Override 261 public int read() throws IOException { 262 for (;;) { 263 if (encoderOut.hasRemaining()) { 264 return encoderOut.get() & 0xFF; 265 } 266 fillBuffer(); 267 if (endOfInput && !encoderOut.hasRemaining()) { 268 return -1; 269 } 270 } 271 } 272 273 /** 274 * Close the stream. This method will cause the underlying {@link Reader} 275 * to be closed. 276 * @throws IOException if an I/O error occurs 277 */ 278 @Override 279 public void close() throws IOException { 280 reader.close(); 281 } 282}