001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.parser;
014
015import static org.apache.juneau.internal.IOUtils.*;
016import static org.apache.juneau.internal.StringUtils.*;
017
018import java.io.*;
019import java.nio.charset.*;
020
021import org.apache.juneau.*;
022import org.apache.juneau.internal.*;
023
024/**
025 * A wrapper around an object that a parser reads its input from.
026 *
027 * <p>
028 * For character-based parsers, the input object can be any of the following:
029 * <ul>
030 *    <li>{@link Reader}
031 *    <li>{@link CharSequence}
032 *    <li>{@link InputStream}
033 *    <li><code><jk>byte</jk>[]</code>
034 *    <li>{@link File}
035 *    <li><code><jk>null</jk></code>
036 * </ul>
037 *
038 * <p>
039 * For stream-based parsers, the input object can be any of the following:
040 * <ul>
041 *    <li>{@link InputStream}
042 *    <li><code><jk>byte</jk>[]</code>
043 *    <li>{@link File}
044 *    <li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
045 *    <li><code><jk>null</jk></code>
046 * </ul>
047 *
048 * <p>
049 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
050 * streams and readers created from other types (e.g. Files) WILL be automatically closed.
051 */
052public final class ParserPipe implements Closeable {
053
054   private final Object input;
055   final boolean debug, strict, autoCloseStreams, unbuffered;
056   private final Charset charset;
057
058   private String inputString;
059   private InputStream inputStream;
060   private Reader reader;
061   private ParserReader parserReader;
062   private boolean doClose;
063   private BinaryFormat binaryFormat;
064   private Positionable positionable;
065
066   /**
067    * Constructor for reader-based parsers.
068    *
069    * @param input The parser input object.
070    * @param debug
071    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
072    *    method.
073    *    This allows the contents of the pipe to be accessed when a problem occurs.
074    * @param strict
075    *    If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
076    *    and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
077    *    Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
078    * @param autoCloseStreams
079    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
080    * @param unbuffered
081    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
082    *    multiple times.
083    *    <br>Otherwise, we read character data into a reusable buffer.
084    * @param fileCharset
085    *    The charset to expect when reading from {@link File Files}.
086    * @param streamCharset
087    *    The charset to expect when reading from {@link InputStream InputStreams}.
088    */
089   public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) {
090      boolean isFile = input instanceof File;
091      this.input = input;
092      this.debug = debug;
093      this.strict = strict;
094      this.autoCloseStreams = autoCloseStreams;
095      this.unbuffered = unbuffered;
096      Charset cs = isFile ? fileCharset : streamCharset;
097      if (cs == null)
098         cs = (isFile ? Charset.defaultCharset() : UTF8);
099      this.charset = cs;
100      if (input instanceof CharSequence)
101         this.inputString = input.toString();
102      this.binaryFormat = null;
103   }
104
105   /**
106    * Constructor for stream-based parsers.
107    *
108    * @param input The parser input object.
109    * @param debug
110    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
111    *    method.
112    *    This allows the contents of the pipe to be accessed when a problem occurs.
113    * @param autoCloseStreams
114    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
115    * @param unbuffered
116    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
117    *    multiple times.
118    *    <br>Otherwise, we read character data into a reusable buffer.
119    * @param binaryFormat The binary format of input strings when converted to bytes.
120    */
121   public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
122      this.input = input;
123      this.debug = debug;
124      this.strict = false;
125      this.autoCloseStreams = autoCloseStreams;
126      this.unbuffered = unbuffered;
127      this.charset = null;
128      if (input instanceof CharSequence)
129         this.inputString = input.toString();
130      this.binaryFormat = binaryFormat;
131   }
132
133   /**
134    * Shortcut constructor, typically for straight string input.
135    *
136    * <p>
137    * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
138    *
139    * @param input The input object.
140    */
141   public ParserPipe(Object input) {
142      this(input, false, false, false, false, null, null);
143   }
144
145   /**
146    * Wraps the specified input object inside an input stream.
147    *
148    * <p>
149    * Subclasses can override this method to implement their own input streams.
150    *
151    * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
152    * @throws IOException If object could not be converted to an input stream.
153    */
154   public InputStream getInputStream() throws IOException {
155      if (input == null)
156         return null;
157
158      if (input instanceof InputStream) {
159         if (debug) {
160            byte[] b = readBytes((InputStream)input, 1024);
161            inputString = toHex(b);
162            inputStream = new ByteArrayInputStream(b);
163         } else {
164            inputStream = (InputStream)input;
165            doClose = autoCloseStreams;
166         }
167      } else if (input instanceof byte[]) {
168         if (debug)
169            inputString = toHex((byte[])input);
170         inputStream = new ByteArrayInputStream((byte[])input);
171         doClose = false;
172      } else if (input instanceof String) {
173         inputString = (String)input;
174         inputStream = new ByteArrayInputStream(convertFromString((String)input));
175         doClose = false;
176      } else if (input instanceof File) {
177         if (debug) {
178            byte[] b = readBytes((File)input);
179            inputString = toHex(b);
180            inputStream = new ByteArrayInputStream(b);
181         } else {
182            inputStream = new FileInputStream((File)input);
183            doClose = true;
184         }
185      } else {
186         throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to an InputStream.");
187      }
188
189      return inputStream;
190   }
191
192   private byte[] convertFromString(String in) {
193      switch(binaryFormat) {
194         case BASE64: return base64Decode(in);
195         case HEX: return fromHex(in);
196         case SPACED_HEX: return fromSpacedHex(in);
197         default: return new byte[0];
198      }
199   }
200
201   /**
202    * Wraps the specified input object inside a reader.
203    *
204    * <p>
205    * Subclasses can override this method to implement their own readers.
206    *
207    * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
208    * @throws IOException If object could not be converted to a reader.
209    */
210   public Reader getReader() throws IOException {
211      if (input == null)
212         return null;
213
214      if (input instanceof Reader) {
215         if (debug) {
216            inputString = read((Reader)input);
217            reader = new StringReader(inputString);
218         } else {
219            reader = (Reader)input;
220            doClose = autoCloseStreams;
221         }
222      } else if (input instanceof CharSequence) {
223         inputString = input.toString();
224         reader = new ParserReader(this);
225         doClose = false;
226      } else if (input instanceof InputStream || input instanceof byte[]) {
227         doClose = input instanceof InputStream && autoCloseStreams;
228         InputStream is = (
229            input instanceof InputStream
230            ? (InputStream)input
231            : new ByteArrayInputStream((byte[])input)
232         );
233         CharsetDecoder cd = charset.newDecoder();
234         if (strict) {
235            cd.onMalformedInput(CodingErrorAction.REPORT);
236            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
237         } else {
238            cd.onMalformedInput(CodingErrorAction.REPLACE);
239            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
240         }
241         reader = new InputStreamReader(is, cd);
242         if (debug) {
243            inputString = read(reader);
244            reader = new StringReader(inputString);
245         }
246      } else if (input instanceof File) {
247         CharsetDecoder cd = charset.newDecoder();
248         if (strict) {
249            cd.onMalformedInput(CodingErrorAction.REPORT);
250            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
251         } else {
252            cd.onMalformedInput(CodingErrorAction.REPLACE);
253            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
254         }
255         reader = new InputStreamReader(new FileInputStream((File)input), cd);
256         if (debug) {
257            inputString = read(reader);
258            reader = new StringReader(inputString);
259         }
260         doClose = true;
261      } else {
262         throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to a Reader.");
263      }
264
265      return reader;
266   }
267
268   /**
269    * Returns the contents of this pipe as a buffered reader.
270    *
271    * <p>
272    * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
273    *
274    * @return The contents of this pipe as a buffered reader.
275    * @throws IOException Thrown by underlying stream.
276    */
277   public Reader getBufferedReader() throws IOException {
278      return IOUtils.getBufferedReader(getReader());
279   }
280
281   /**
282    * Returns the input to this parser as a plain string.
283    *
284    * <p>
285    * This method only returns a value if {@link BeanContext#BEAN_debug} is enabled.
286    *
287    * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
288    */
289   public String getInputAsString() {
290      return inputString;
291   }
292
293   /**
294    * Converts this pipe into a {@link ParserReader}.
295    *
296    * @return The converted pipe.
297    * @throws IOException Thrown by underlying stream.
298    */
299   public ParserReader getParserReader() throws IOException {
300      if (input == null)
301         return null;
302      if (input instanceof ParserReader)
303         parserReader = (ParserReader)input;
304      else
305         parserReader = new ParserReader(this);
306      return parserReader;
307   }
308
309   /**
310    * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
311    *
312    * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
313    */
314   public boolean isString() {
315      return inputString != null;
316   }
317
318   /**
319    * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
320    *
321    * <p>
322    * Used for gathering the failure position when {@link ParseException} is thrown.
323    *
324    * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
325    */
326   public void setPositionable(Positionable positionable) {
327      this.positionable = positionable;
328   }
329
330   Position getPosition() {
331      if (positionable == null)
332         return Position.UNKNOWN;
333      Position p = positionable.getPosition();
334      if (p == null)
335         return Position.UNKNOWN;
336      return p;
337   }
338
339   @Override /* Closeable */
340   public void close() {
341      try {
342         if (doClose)
343            IOUtils.close(reader, inputStream);
344      } catch (IOException e) {
345         throw new BeanRuntimeException(e);
346      }
347   }
348}