001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.internal; 018 019import java.io.*; 020import java.nio.*; 021import java.nio.charset.*; 022import java.util.*; 023 024/** 025 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 026 * and transforms it to a byte stream using a specified charset encoding. The stream 027 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 028 * encodings supported by the JRE are handled correctly. In particular for charsets such as 029 * UTF-16, the implementation ensures that one and only one byte order marker 030 * is produced. 031 * <p> 032 * Since in general it is not possible to predict the number of characters to be read from the 033 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 034 * the {@link Reader} are buffered. There is therefore no well defined correlation 035 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 036 * This also implies that in general there is no need to wrap the underlying {@link Reader} 037 * in a {@link java.io.BufferedReader}. 038 * <p> 039 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 040 * in the following example, reading from {@code in2} would return the same byte 041 * sequence as reading from {@code in} (provided that the initial byte sequence is legal 042 * with respect to the charset encoding): 043 * <pre> 044 * InputStream in = ... 045 * Charset cs = ... 046 * InputStreamReader reader = new InputStreamReader(in, cs); 047 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 048 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 049 * except that the control flow is reversed: both classes transform a character stream 050 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 051 * while {@link ReaderInputStream} pulls it from the underlying stream. 052 * <p> 053 * Note that while there are use cases where there is no alternative to using 054 * this class, very often the need to use this class is an indication of a flaw 055 * in the design of the code. This class is typically used in situations where an existing 056 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 057 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 058 * where this problem may appear is when implementing the {@code javax.activation.DataSource} 059 * interface from the Java Activation Framework. 060 * <p> 061 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 062 * read operation will block or not, it is not possible to provide a meaningful 063 * implementation of the {@link InputStream#available()} method. A call to this method 064 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 065 * <p> 066 * 067 * <h5 class='section'>Notes:</h5><ul> 068 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 069 * </ul> 070 * 071 * <h5 class='section'>See Also:</h5><ul> 072 * </ul> 073 */ 074public class ReaderInputStream extends InputStream { 075 private static final int DEFAULT_BUFFER_SIZE = 1024; 076 077 private final Reader reader; 078 private final CharsetEncoder encoder; 079 080 /** 081 * CharBuffer used as input for the decoder. It should be reasonably 082 * large as we read data from the underlying Reader into this buffer. 083 */ 084 private final CharBuffer encoderIn; 085 086 /** 087 * ByteBuffer used as output for the decoder. This buffer can be small 088 * as it is only used to transfer data from the decoder to the 089 * buffer provided by the caller. 090 */ 091 private final ByteBuffer encoderOut; 092 093 private CoderResult lastCoderResult; 094 private boolean endOfInput; 095 096 /** 097 * Construct a new {@link ReaderInputStream}. 098 * 099 * @param reader the target {@link Reader} 100 * @param encoder the charset encoder 101 * @since 2.1 102 */ 103 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) { 104 this(reader, encoder, DEFAULT_BUFFER_SIZE); 105 } 106 107 /** 108 * Construct a new {@link ReaderInputStream}. 109 * 110 * @param reader the target {@link Reader} 111 * @param encoder the charset encoder 112 * @param bufferSize the size of the input buffer in number of characters 113 */ 114 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) { 115 this.reader = reader; 116 this.encoder = encoder; 117 this.encoderIn = CharBuffer.allocate(bufferSize); 118 this.encoderIn.flip(); // Fixes Java 11 issue. 119 this.encoderOut = ByteBuffer.allocate(128); 120 this.encoderOut.flip(); // Fixes Java 11 issue. 121 } 122 123 /** 124 * Construct a new {@link ReaderInputStream}. 125 * 126 * @param reader the target {@link Reader} 127 * @param charset the charset encoding 128 * @param bufferSize the size of the input buffer in number of characters 129 */ 130 public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) { 131 this(reader, 132 charset.newEncoder() 133 .onMalformedInput(CodingErrorAction.REPLACE) 134 .onUnmappableCharacter(CodingErrorAction.REPLACE), 135 bufferSize); 136 } 137 138 /** 139 * Construct a new {@link ReaderInputStream} with a default input buffer size of 140 * <c>1024</c> characters. 141 * 142 * @param reader the target {@link Reader} 143 * @param charset the charset encoding 144 */ 145 public ReaderInputStream(final Reader reader, final Charset charset) { 146 this(reader, charset, DEFAULT_BUFFER_SIZE); 147 } 148 149 /** 150 * Construct a new {@link ReaderInputStream}. 151 * 152 * @param reader the target {@link Reader} 153 * @param charsetName the name of the charset encoding 154 * @param bufferSize the size of the input buffer in number of characters 155 */ 156 public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) { 157 this(reader, Charset.forName(charsetName), bufferSize); 158 } 159 160 /** 161 * Construct a new {@link ReaderInputStream} with a default input buffer size of 162 * <c>1024</c> characters. 163 * 164 * @param reader the target {@link Reader} 165 * @param charsetName the name of the charset encoding 166 */ 167 public ReaderInputStream(final Reader reader, final String charsetName) { 168 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 169 } 170 171 /** 172 * Fills the internal char buffer from the reader. 173 * 174 * @throws IOException 175 * If an I/O error occurs 176 */ 177 private void fillBuffer() throws IOException { 178 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 179 encoderIn.compact(); 180 final int position = encoderIn.position(); 181 // We don't use Reader#read(CharBuffer) here because it is more efficient 182 // to write directly to the underlying char array (the default implementation 183 // copies data to a temporary char array). 184 final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 185 if (c == -1) { 186 endOfInput = true; 187 } else { 188 encoderIn.position(position+c); 189 } 190 encoderIn.flip(); 191 } 192 encoderOut.compact(); 193 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 194 encoderOut.flip(); 195 } 196 197 /** 198 * Read the specified number of bytes into an array. 199 * 200 * @param array the byte array to read into 201 * @param off the offset to start reading bytes into 202 * @param len the number of bytes to read 203 * @return the number of bytes read or <code>-1</code> 204 * if the end of the stream has been reached 205 * @throws IOException if an I/O error occurs 206 */ 207 @Override 208 public int read(final byte[] array, int off, int len) throws IOException { 209 Objects.requireNonNull(array, "array"); 210 if (len < 0 || off < 0 || (off + len) > array.length) { 211 throw new IndexOutOfBoundsException("Array Size=" + array.length + 212 ", offset=" + off + ", length=" + len); 213 } 214 int read = 0; 215 if (len == 0) { 216 return 0; // Always return 0 if len == 0 217 } 218 while (len > 0) { 219 if (encoderOut.hasRemaining()) { 220 final int c = Math.min(encoderOut.remaining(), len); 221 encoderOut.get(array, off, c); 222 off += c; 223 len -= c; 224 read += c; 225 } else { 226 fillBuffer(); 227 if (endOfInput && !encoderOut.hasRemaining()) { 228 break; 229 } 230 } 231 } 232 return read == 0 && endOfInput ? -1 : read; 233 } 234 235 /** 236 * Read the specified number of bytes into an array. 237 * 238 * @param b the byte array to read into 239 * @return the number of bytes read or <code>-1</code> 240 * if the end of the stream has been reached 241 * @throws IOException if an I/O error occurs 242 */ 243 @Override 244 public int read(final byte[] b) throws IOException { 245 return read(b, 0, b.length); 246 } 247 248 /** 249 * Read a single byte. 250 * 251 * @return either the byte read or <code>-1</code> if the end of the stream 252 * has been reached 253 * @throws IOException if an I/O error occurs 254 */ 255 @Override 256 public int read() throws IOException { 257 for (;;) { 258 if (encoderOut.hasRemaining()) { 259 return encoderOut.get() & 0xFF; 260 } 261 fillBuffer(); 262 if (endOfInput && !encoderOut.hasRemaining()) { 263 return -1; 264 } 265 } 266 } 267 268 /** 269 * Close the stream. This method will cause the underlying {@link Reader} 270 * to be closed. 271 * @throws IOException if an I/O error occurs 272 */ 273 @Override 274 public void close() throws IOException { 275 reader.close(); 276 } 277}