001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.commons.io; 018 019import static org.apache.juneau.commons.utils.AssertionUtils.*; 020 021import java.io.*; 022import java.nio.*; 023import java.nio.charset.*; 024 025/** 026 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 027 * and transforms it to a byte stream using a specified charset encoding. The stream 028 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 029 * encodings supported by the JRE are handled correctly. In particular for charsets such as 030 * UTF-16, the implementation ensures that one and only one byte order marker 031 * is produced. 032 * <p> 033 * Since in general it is not possible to predict the number of characters to be read from the 034 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 035 * the {@link Reader} are buffered. There is therefore no well defined correlation 036 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 037 * This also implies that in general there is no need to wrap the underlying {@link Reader} 038 * in a {@link java.io.BufferedReader}. 039 * <p> 040 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 041 * in the following example, reading from {@code in2} would return the same byte 042 * sequence as reading from {@code in} (provided that the initial byte sequence is legal 043 * with respect to the charset encoding): 044 * <pre> 045 * InputStream in = ... 046 * Charset cs = ... 047 * InputStreamReader reader = new InputStreamReader(in, cs); 048 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 049 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 050 * except that the control flow is reversed: both classes transform a character stream 051 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 052 * while {@link ReaderInputStream} pulls it from the underlying stream. 053 * <p> 054 * Note that while there are use cases where there is no alternative to using 055 * this class, very often the need to use this class is an indication of a flaw 056 * in the design of the code. This class is typically used in situations where an existing 057 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 058 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 059 * where this problem may appear is when implementing the {@code javax.activation.DataSource} 060 * interface from the Java Activation Framework. 061 * <p> 062 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 063 * read operation will block or not, it is not possible to provide a meaningful 064 * implementation of the {@link InputStream#available()} method. A call to this method 065 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 066 * <p> 067 * 068 * <h5 class='section'>Notes:</h5><ul> 069 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 070 * </ul> 071 * 072 */ 073public class ReaderInputStream extends InputStream { 074 private static final int DEFAULT_BUFFER_SIZE = 1024; 075 076 private final Reader reader; 077 private final CharsetEncoder encoder; 078 079 /** 080 * CharBuffer used as input for the decoder. It should be reasonably 081 * large as we read data from the underlying Reader into this buffer. 082 */ 083 private final CharBuffer encoderIn; 084 085 /** 086 * ByteBuffer used as output for the decoder. This buffer can be small 087 * as it is only used to transfer data from the decoder to the 088 * buffer provided by the caller. 089 */ 090 private final ByteBuffer encoderOut; 091 092 private CoderResult lastCoderResult; 093 private boolean endOfInput; 094 095 /** 096 * Construct a new {@link ReaderInputStream} with a default input buffer size of 097 * <c>1024</c> characters. 098 * 099 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 100 * @param charset the charset encoding. Must not be <jk>null</jk>. 101 */ 102 public ReaderInputStream(Reader reader, Charset charset) { 103 this(reader, charset, DEFAULT_BUFFER_SIZE); 104 } 105 106 /** 107 * Construct a new {@link ReaderInputStream}. 108 * 109 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 110 * @param charset the charset encoding. Must not be <jk>null</jk>. 111 * @param bufferSize the size of the input buffer in number of characters. Must be positive. 112 */ 113 @SuppressWarnings("resource") 114 public ReaderInputStream(Reader reader, Charset charset, int bufferSize) { 115 // @formatter:off 116 this(assertArgNotNull("reader", reader), 117 assertArgNotNull("charset", charset).newEncoder() 118 .onMalformedInput(CodingErrorAction.REPLACE) 119 .onUnmappableCharacter(CodingErrorAction.REPLACE), 120 bufferSize 121 ); 122 // @formatter:on 123 } 124 125 /** 126 * Construct a new {@link ReaderInputStream}. 127 * 128 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 129 * @param encoder the charset encoder. Must not be <jk>null</jk>. 130 * @since 2.1 131 */ 132 public ReaderInputStream(Reader reader, CharsetEncoder encoder) { 133 this(reader, encoder, DEFAULT_BUFFER_SIZE); 134 } 135 136 /** 137 * Construct a new {@link ReaderInputStream}. 138 * 139 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 140 * @param encoder the charset encoder. Must not be <jk>null</jk>. 141 * @param bufferSize the size of the input buffer in number of characters. Must be positive. 142 */ 143 public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) { 144 this.reader = assertArgNotNull("reader", reader); 145 this.encoder = assertArgNotNull("encoder", encoder); 146 assertArg(bufferSize > 0, "Argument 'bufferSize' must be positive."); 147 this.encoderIn = CharBuffer.allocate(bufferSize); 148 this.encoderIn.flip(); // Fixes Java 11 issue. 149 this.encoderOut = ByteBuffer.allocate(128); 150 this.encoderOut.flip(); // Fixes Java 11 issue. 151 } 152 153 /** 154 * Construct a new {@link ReaderInputStream} with a default input buffer size of 155 * <c>1024</c> characters. 156 * 157 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 158 * @param charsetName the name of the charset encoding. Must not be <jk>null</jk>. 159 */ 160 public ReaderInputStream(Reader reader, String charsetName) { 161 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 162 } 163 164 /** 165 * Construct a new {@link ReaderInputStream}. 166 * 167 * @param reader the target {@link Reader}. Must not be <jk>null</jk>. 168 * @param charsetName the name of the charset encoding. Must not be <jk>null</jk>. 169 * @param bufferSize the size of the input buffer in number of characters. Must be positive. 170 */ 171 public ReaderInputStream(Reader reader, String charsetName, int bufferSize) { 172 this(reader, Charset.forName(assertArgNotNull("charsetName", charsetName)), bufferSize); 173 } 174 175 /** 176 * Close the stream. This method will cause the underlying {@link Reader} 177 * to be closed. 178 * @throws IOException if an I/O error occurs 179 */ 180 @Override 181 public void close() throws IOException { 182 reader.close(); 183 } 184 185 /** 186 * Read a single byte. 187 * 188 * @return either the byte read or <code>-1</code> if the end of the stream 189 * has been reached 190 * @throws IOException if an I/O error occurs 191 */ 192 @Override 193 public int read() throws IOException { 194 for (;;) { 195 if (encoderOut.hasRemaining()) { 196 return encoderOut.get() & 0xFF; 197 } 198 fillBuffer(); 199 if (endOfInput && ! encoderOut.hasRemaining()) { 200 return -1; 201 } 202 } 203 } 204 205 /** 206 * Read the specified number of bytes into an array. 207 * 208 * @param b the byte array to read into 209 * @return the number of bytes read or <code>-1</code> 210 * if the end of the stream has been reached 211 * @throws IOException if an I/O error occurs 212 */ 213 @Override 214 public int read(byte[] b) throws IOException { 215 assertArgNotNull("b", b); 216 return read(b, 0, b.length); 217 } 218 219 /** 220 * Read the specified number of bytes into an array. 221 * 222 * @param array the byte array to read into 223 * @param off the offset to start reading bytes into 224 * @param len the number of bytes to read 225 * @return the number of bytes read or <code>-1</code> 226 * if the end of the stream has been reached 227 * @throws IOException if an I/O error occurs 228 */ 229 @Override 230 public int read(byte[] array, int off, int len) throws IOException { 231 assertArgNotNull("array", array); 232 if (len < 0 || off < 0 || (off + len) > array.length) { 233 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 234 } 235 int read = 0; 236 if (len == 0) { 237 return 0; // Always return 0 if len == 0 238 } 239 while (len > 0) { 240 if (encoderOut.hasRemaining()) { 241 final int c = Math.min(encoderOut.remaining(), len); 242 encoderOut.get(array, off, c); 243 off += c; 244 len -= c; 245 read += c; 246 } else { 247 fillBuffer(); 248 if (endOfInput && ! encoderOut.hasRemaining()) { 249 break; 250 } 251 } 252 } 253 return read == 0 && endOfInput ? -1 : read; 254 } 255 256 /** 257 * Fills the internal char buffer from the reader. 258 * 259 * @throws IOException 260 * If an I/O error occurs 261 */ 262 private void fillBuffer() throws IOException { 263 if (! endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 264 encoderIn.compact(); 265 final int position = encoderIn.position(); 266 // We don't use Reader#read(CharBuffer) here because it is more efficient 267 // to write directly to the underlying char array (the default implementation 268 // copies data to a temporary char array). 269 final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 270 if (c == -1) { 271 endOfInput = true; 272 } else { 273 encoderIn.position(position + c); 274 } 275 encoderIn.flip(); 276 } 277 encoderOut.compact(); 278 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 279 encoderOut.flip(); 280 } 281}