001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.internal;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.io.Writer;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetDecoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028
029/**
030 * {@link OutputStream} implementation that transforms a byte stream to a
031 * character stream using a specified charset encoding and writes the resulting
032 * stream to a {@link Writer}. The stream is transformed using a
033 * {@link CharsetDecoder} object, guaranteeing that all charset
034 * encodings supported by the JRE are handled correctly.
035 * <p>
036 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer.
037 * This implies that the data is written to the underlying {@link Writer} in chunks
038 * that are no larger than the size of this buffer. By default, the buffer is
039 * flushed only when it overflows or when {@link #flush()} or {@link #close()}
040 * is called. In general there is therefore no need to wrap the underlying {@link Writer}
041 * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also
042 * be instructed to flush the buffer after each write operation. In this case, all
043 * available data is written immediately to the underlying {@link Writer}, implying that
044 * the current position of the {@link Writer} is correlated to the current position
045 * of the {@link WriterOutputStream}.
046 * <p>
047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter};
048 * in the following example, writing to {@code out2} would have the same result as writing to
049 * {@code out} directly (provided that the byte sequence is legal with respect to the
050 * charset encoding):
051 * <pre>
052 * OutputStream out = ...
053 * Charset cs = ...
054 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
055 * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre>
056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader},
057 * except that the control flow is reversed: both classes transform a byte stream
058 * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream,
059 * while {@link WriterOutputStream} pushes it to the underlying stream.
060 * <p>
061 * Note that while there are use cases where there is no alternative to using
062 * this class, very often the need to use this class is an indication of a flaw
063 * in the design of the code. This class is typically used in situations where an existing
064 * API only accepts an {@link OutputStream} object, but where the stream is known to represent
065 * character data that must be decoded for further use.
066 * <p>
067 * Instances of {@link WriterOutputStream} are not thread safe.
068 *
069 * @since 2.0
070 */
071public class WriterOutputStream extends OutputStream {
072    private static final int BUFFER_SIZE = 1024;
073
074    private final Writer writer;
075    private final CharsetDecoder decoder;
076    private final boolean writeImmediately;
077
078    /**
079     * ByteBuffer used as input for the decoder. This buffer can be small
080     * as it is used only to transfer the received data to the
081     * decoder.
082     */
083    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
084
085    /**
086     * CharBuffer used as output for the decoder. It should be
087     * somewhat larger as we write from this buffer to the
088     * underlying Writer.
089     */
090    private final CharBuffer decoderOut;
091
092    /**
093     * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024
094     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
095     * is called.
096     *
097     * @param writer the target {@link Writer}
098     * @param decoder the charset decoder
099     * @since 2.1
100     */
101    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
102        this(writer, decoder, BUFFER_SIZE, false);
103    }
104
105    /**
106     * Constructs a new {@link WriterOutputStream}.
107     *
108     * @param writer the target {@link Writer}
109     * @param decoder the charset decoder
110     * @param bufferSize the size of the output buffer in number of characters
111     * @param writeImmediately If {@code true} the output buffer will be flushed after each
112     *                         write operation, i.e. all available data will be written to the
113     *                         underlying {@link Writer} immediately. If {@code false}, the
114     *                         output buffer will only be flushed when it overflows or when
115     *                         {@link #flush()} or {@link #close()} is called.
116     * @since 2.1
117     */
118    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize,
119                              final boolean writeImmediately) {
120        checkIbmJdkWithBrokenUTF16( decoder.charset());
121        this.writer = writer;
122        this.decoder = decoder;
123        this.writeImmediately = writeImmediately;
124        decoderOut = CharBuffer.allocate(bufferSize);
125    }
126
127    /**
128     * Constructs a new {@link WriterOutputStream}.
129     *
130     * @param writer the target {@link Writer}
131     * @param charset the charset encoding
132     * @param bufferSize the size of the output buffer in number of characters
133     * @param writeImmediately If {@code true} the output buffer will be flushed after each
134     *                         write operation, i.e. all available data will be written to the
135     *                         underlying {@link Writer} immediately. If {@code false}, the
136     *                         output buffer will only be flushed when it overflows or when
137     *                         {@link #flush()} or {@link #close()} is called.
138     */
139    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize,
140                              final boolean writeImmediately) {
141        this(writer,
142             charset.newDecoder()
143                    .onMalformedInput(CodingErrorAction.REPLACE)
144                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
145                    .replaceWith("?"),
146             bufferSize,
147             writeImmediately);
148    }
149
150    /**
151     * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024
152     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
153     * is called.
154     *
155     * @param writer the target {@link Writer}
156     * @param charset the charset encoding
157     */
158    public WriterOutputStream(final Writer writer, final Charset charset) {
159        this(writer, charset, BUFFER_SIZE, false);
160    }
161
162    /**
163     * Constructs a new {@link WriterOutputStream}.
164     *
165     * @param writer the target {@link Writer}
166     * @param charsetName the name of the charset encoding
167     * @param bufferSize the size of the output buffer in number of characters
168     * @param writeImmediately If {@code true} the output buffer will be flushed after each
169     *                         write operation, i.e. all available data will be written to the
170     *                         underlying {@link Writer} immediately. If {@code false}, the
171     *                         output buffer will only be flushed when it overflows or when
172     *                         {@link #flush()} or {@link #close()} is called.
173     */
174    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize,
175                              final boolean writeImmediately) {
176        this(writer, Charset.forName(charsetName), bufferSize, writeImmediately);
177    }
178
179    /**
180     * Constructs a new {@link WriterOutputStream} with a default output buffer size of 1024
181     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
182     * is called.
183     *
184     * @param writer the target {@link Writer}
185     * @param charsetName the name of the charset encoding
186     */
187    public WriterOutputStream(final Writer writer, final String charsetName) {
188        this(writer, charsetName, BUFFER_SIZE, false);
189    }
190
191    /**
192     * Write bytes from the specified byte array to the stream.
193     *
194     * @param b the byte array containing the bytes to write
195     * @param off the start offset in the byte array
196     * @param len the number of bytes to write
197     * @throws IOException if an I/O error occurs
198     */
199    @Override
200    public void write(final byte[] b, int off, int len) throws IOException {
201        while (len > 0) {
202            final int c = Math.min(len, decoderIn.remaining());
203            decoderIn.put(b, off, c);
204            processInput(false);
205            len -= c;
206            off += c;
207        }
208        if (writeImmediately) {
209            flushOutput();
210        }
211    }
212
213    /**
214     * Write bytes from the specified byte array to the stream.
215     *
216     * @param b the byte array containing the bytes to write
217     * @throws IOException if an I/O error occurs
218     */
219    @Override
220    public void write(final byte[] b) throws IOException {
221        write(b, 0, b.length);
222    }
223
224    /**
225     * Write a single byte to the stream.
226     *
227     * @param b the byte to write
228     * @throws IOException if an I/O error occurs
229     */
230    @Override
231    public void write(final int b) throws IOException {
232        write(new byte[] { (byte)b }, 0, 1);
233    }
234
235    /**
236     * Flush the stream. Any remaining content accumulated in the output buffer
237     * will be written to the underlying {@link Writer}. After that
238     * {@link Writer#flush()} will be called.
239     * @throws IOException if an I/O error occurs
240     */
241    @Override
242    public void flush() throws IOException {
243        flushOutput();
244        writer.flush();
245    }
246
247    /**
248     * Close the stream. Any remaining content accumulated in the output buffer
249     * will be written to the underlying {@link Writer}. After that
250     * {@link Writer#close()} will be called.
251     * @throws IOException if an I/O error occurs
252     */
253    @Override
254    public void close() throws IOException {
255        processInput(true);
256        flushOutput();
257        writer.close();
258    }
259
260    /**
261     * Decode the contents of the input ByteBuffer into a CharBuffer.
262     *
263     * @param endOfInput indicates end of input
264     * @throws IOException if an I/O error occurs
265     */
266    private void processInput(final boolean endOfInput) throws IOException {
267        // Prepare decoderIn for reading
268        decoderIn.flip();
269        CoderResult coderResult;
270        while (true) {
271            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
272            if (coderResult.isOverflow()) {
273                flushOutput();
274            } else if (coderResult.isUnderflow()) {
275                break;
276            } else {
277                // The decoder is configured to replace malformed input and unmappable characters,
278                // so we should not get here.
279                throw new IOException("Unexpected coder result");
280            }
281        }
282        // Discard the bytes that have been read
283        decoderIn.compact();
284    }
285
286    /**
287     * Flush the output.
288     *
289     * @throws IOException if an I/O error occurs
290     */
291    private void flushOutput() throws IOException {
292        if (decoderOut.position() > 0) {
293            writer.write(decoderOut.array(), 0, decoderOut.position());
294            decoderOut.rewind();
295        }
296    }
297
298    /**
299     * Check if the JDK in use properly supports the given charset.
300     *
301     * @param charset the charset to check the support for
302     */
303    private static void checkIbmJdkWithBrokenUTF16(final Charset charset){
304        if (!"UTF-16".equals(charset.name())) {
305            return;
306        }
307        final String TEST_STRING_2 = "v\u00e9s";
308        final byte[] bytes = TEST_STRING_2.getBytes(charset);
309
310        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
311        final ByteBuffer bb2 = ByteBuffer.allocate(16);
312        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
313        final int len = bytes.length;
314        for (int i = 0; i < len; i++) {
315            bb2.put(bytes[i]);
316            bb2.flip();
317            try {
318                charsetDecoder2.decode(bb2, cb2, i == (len - 1));
319            } catch ( final IllegalArgumentException e){
320                throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " +
321                        "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
322            }
323            bb2.compact();
324        }
325        cb2.rewind();
326        if (!TEST_STRING_2.equals(cb2.toString())){
327            throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " +
328                    "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
329        }
330
331    }
332}
333
334
335
336
337
338
339
340