001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.internal; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.io.Writer; 022import java.nio.ByteBuffer; 023import java.nio.CharBuffer; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetDecoder; 026import java.nio.charset.CoderResult; 027import java.nio.charset.CodingErrorAction; 028 029/** 030 * {@link OutputStream} implementation that transforms a byte stream to a 031 * character stream using a specified charset encoding and writes the resulting 032 * stream to a {@link Writer}. The stream is transformed using a 033 * {@link CharsetDecoder} object, guaranteeing that all charset 034 * encodings supported by the JRE are handled correctly. 035 * <p> 036 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. 037 * This implies that the data is written to the underlying {@link Writer} in chunks 038 * that are no larger than the size of this buffer. By default, the buffer is 039 * flushed only when it overflows or when {@link #flush()} or {@link #close()} 040 * is called. In general there is therefore no need to wrap the underlying {@link Writer} 041 * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also 042 * be instructed to flush the buffer after each write operation. In this case, all 043 * available data is written immediately to the underlying {@link Writer}, implying that 044 * the current position of the {@link Writer} is correlated to the current position 045 * of the {@link WriterOutputStream}. 046 * <p> 047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; 048 * in the following example, writing to {@code out2} would have the same result as writing to 049 * {@code out} directly (provided that the byte sequence is legal with respect to the 050 * charset encoding): 051 * <pre> 052 * OutputStream out = ... 053 * Charset cs = ... 054 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 055 * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre> 056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, 057 * except that the control flow is reversed: both classes transform a byte stream 058 * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, 059 * while {@link WriterOutputStream} pushes it to the underlying stream. 060 * <p> 061 * Note that while there are use cases where there is no alternative to using 062 * this class, very often the need to use this class is an indication of a flaw 063 * in the design of the code. This class is typically used in situations where an existing 064 * API only accepts an {@link OutputStream} object, but where the stream is known to represent 065 * character data that must be decoded for further use. 066 * <p> 067 * Instances of {@link WriterOutputStream} are not thread safe. 068 * 069 * @since 2.0 070 */ 071public class WriterOutputStream extends OutputStream { 072 private static final int BUFFER_SIZE = 1024; 073 074 private final Writer writer; 075 private final CharsetDecoder decoder; 076 private final boolean writeImmediately; 077 078 /** 079 * ByteBuffer used as input for the decoder. This buffer can be small 080 * as it is used only to transfer the received data to the 081 * decoder. 082 */ 083 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 084 085 /** 086 * CharBuffer used as output for the decoder. It should be 087 * somewhat larger as we write from this buffer to the 088 * underlying Writer. 089 */ 090 private final CharBuffer decoderOut; 091 092 /** 093 * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024 094 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 095 * is called. 096 * 097 * @param writer the target {@link Writer} 098 * @param decoder the charset decoder 099 * @since 2.1 100 */ 101 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 102 this(writer, decoder, BUFFER_SIZE, false); 103 } 104 105 /** 106 * Constructs a new {@link WriterOutputStream}. 107 * 108 * @param writer the target {@link Writer} 109 * @param decoder the charset decoder 110 * @param bufferSize the size of the output buffer in number of characters 111 * @param writeImmediately If {@code true} the output buffer will be flushed after each 112 * write operation, i.e. all available data will be written to the 113 * underlying {@link Writer} immediately. If {@code false}, the 114 * output buffer will only be flushed when it overflows or when 115 * {@link #flush()} or {@link #close()} is called. 116 * @since 2.1 117 */ 118 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, 119 final boolean writeImmediately) { 120 checkIbmJdkWithBrokenUTF16( decoder.charset()); 121 this.writer = writer; 122 this.decoder = decoder; 123 this.writeImmediately = writeImmediately; 124 decoderOut = CharBuffer.allocate(bufferSize); 125 } 126 127 /** 128 * Constructs a new {@link WriterOutputStream}. 129 * 130 * @param writer the target {@link Writer} 131 * @param charset the charset encoding 132 * @param bufferSize the size of the output buffer in number of characters 133 * @param writeImmediately If {@code true} the output buffer will be flushed after each 134 * write operation, i.e. all available data will be written to the 135 * underlying {@link Writer} immediately. If {@code false}, the 136 * output buffer will only be flushed when it overflows or when 137 * {@link #flush()} or {@link #close()} is called. 138 */ 139 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, 140 final boolean writeImmediately) { 141 this(writer, 142 charset.newDecoder() 143 .onMalformedInput(CodingErrorAction.REPLACE) 144 .onUnmappableCharacter(CodingErrorAction.REPLACE) 145 .replaceWith("?"), 146 bufferSize, 147 writeImmediately); 148 } 149 150 /** 151 * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024 152 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 153 * is called. 154 * 155 * @param writer the target {@link Writer} 156 * @param charset the charset encoding 157 */ 158 public WriterOutputStream(final Writer writer, final Charset charset) { 159 this(writer, charset, BUFFER_SIZE, false); 160 } 161 162 /** 163 * Constructs a new {@link WriterOutputStream}. 164 * 165 * @param writer the target {@link Writer} 166 * @param charsetName the name of the charset encoding 167 * @param bufferSize the size of the output buffer in number of characters 168 * @param writeImmediately If {@code true} the output buffer will be flushed after each 169 * write operation, i.e. all available data will be written to the 170 * underlying {@link Writer} immediately. If {@code false}, the 171 * output buffer will only be flushed when it overflows or when 172 * {@link #flush()} or {@link #close()} is called. 173 */ 174 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, 175 final boolean writeImmediately) { 176 this(writer, Charset.forName(charsetName), bufferSize, writeImmediately); 177 } 178 179 /** 180 * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024 181 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 182 * is called. 183 * 184 * @param writer the target {@link Writer} 185 * @param charsetName the name of the charset encoding 186 */ 187 public WriterOutputStream(final Writer writer, final String charsetName) { 188 this(writer, charsetName, BUFFER_SIZE, false); 189 } 190 191 /** 192 * Write bytes from the specified byte array to the stream. 193 * 194 * @param b the byte array containing the bytes to write 195 * @param off the start offset in the byte array 196 * @param len the number of bytes to write 197 * @throws IOException if an I/O error occurs 198 */ 199 @Override 200 public void write(final byte[] b, int off, int len) throws IOException { 201 while (len > 0) { 202 final int c = Math.min(len, decoderIn.remaining()); 203 decoderIn.put(b, off, c); 204 processInput(false); 205 len -= c; 206 off += c; 207 } 208 if (writeImmediately) { 209 flushOutput(); 210 } 211 } 212 213 /** 214 * Write bytes from the specified byte array to the stream. 215 * 216 * @param b the byte array containing the bytes to write 217 * @throws IOException if an I/O error occurs 218 */ 219 @Override 220 public void write(final byte[] b) throws IOException { 221 write(b, 0, b.length); 222 } 223 224 /** 225 * Write a single byte to the stream. 226 * 227 * @param b the byte to write 228 * @throws IOException if an I/O error occurs 229 */ 230 @Override 231 public void write(final int b) throws IOException { 232 write(new byte[] { (byte)b }, 0, 1); 233 } 234 235 /** 236 * Flush the stream. Any remaining content accumulated in the output buffer 237 * will be written to the underlying {@link Writer}. After that 238 * {@link Writer#flush()} will be called. 239 * @throws IOException if an I/O error occurs 240 */ 241 @Override 242 public void flush() throws IOException { 243 flushOutput(); 244 writer.flush(); 245 } 246 247 /** 248 * Close the stream. Any remaining content accumulated in the output buffer 249 * will be written to the underlying {@link Writer}. After that 250 * {@link Writer#close()} will be called. 251 * @throws IOException if an I/O error occurs 252 */ 253 @Override 254 public void close() throws IOException { 255 processInput(true); 256 flushOutput(); 257 writer.close(); 258 } 259 260 /** 261 * Decode the contents of the input ByteBuffer into a CharBuffer. 262 * 263 * @param endOfInput indicates end of input 264 * @throws IOException if an I/O error occurs 265 */ 266 private void processInput(final boolean endOfInput) throws IOException { 267 // Prepare decoderIn for reading 268 decoderIn.flip(); 269 CoderResult coderResult; 270 while (true) { 271 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 272 if (coderResult.isOverflow()) { 273 flushOutput(); 274 } else if (coderResult.isUnderflow()) { 275 break; 276 } else { 277 // The decoder is configured to replace malformed input and unmappable characters, 278 // so we should not get here. 279 throw new IOException("Unexpected coder result"); 280 } 281 } 282 // Discard the bytes that have been read 283 decoderIn.compact(); 284 } 285 286 /** 287 * Flush the output. 288 * 289 * @throws IOException if an I/O error occurs 290 */ 291 private void flushOutput() throws IOException { 292 if (decoderOut.position() > 0) { 293 writer.write(decoderOut.array(), 0, decoderOut.position()); 294 decoderOut.rewind(); 295 } 296 } 297 298 /** 299 * Check if the JDK in use properly supports the given charset. 300 * 301 * @param charset the charset to check the support for 302 */ 303 private static void checkIbmJdkWithBrokenUTF16(final Charset charset){ 304 if (!"UTF-16".equals(charset.name())) { 305 return; 306 } 307 final String TEST_STRING_2 = "v\u00e9s"; 308 final byte[] bytes = TEST_STRING_2.getBytes(charset); 309 310 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 311 final ByteBuffer bb2 = ByteBuffer.allocate(16); 312 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 313 final int len = bytes.length; 314 for (int i = 0; i < len; i++) { 315 bb2.put(bytes[i]); 316 bb2.flip(); 317 try { 318 charsetDecoder2.decode(bb2, cb2, i == (len - 1)); 319 } catch ( final IllegalArgumentException e){ 320 throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " + 321 "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 322 } 323 bb2.compact(); 324 } 325 cb2.rewind(); 326 if (!TEST_STRING_2.equals(cb2.toString())){ 327 throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " + 328 "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 329 } 330 331 } 332} 333 334 335 336 337 338 339 340