001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.parser;
014
015import static org.apache.juneau.internal.IOUtils.*;
016import static org.apache.juneau.internal.StringUtils.*;
017
018import java.io.*;
019import java.nio.charset.*;
020
021import org.apache.juneau.*;
022import org.apache.juneau.internal.*;
023
024/**
025 * A wrapper around an object that a parser reads its input from.
026 *
027 * <p>
028 * For character-based parsers, the input object can be any of the following:
029 * <ul>
030 *    <li>{@link Reader}
031 *    <li>{@link CharSequence}
032 *    <li>{@link InputStream}
033 *    <li><code><jk>byte</jk>[]</code>
034 *    <li>{@link File}
035 *    <li><code><jk>null</jk></code>
036 * </ul>
037 *
038 * <p>
039 * For stream-based parsers, the input object can be any of the following:
040 * <ul>
041 *    <li>{@link InputStream}
042 *    <li><code><jk>byte</jk>[]</code>
043 *    <li>{@link File}
044 *    <li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
045 *    <li><code><jk>null</jk></code>
046 * </ul>
047 *
048 * <p>
049 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
050 * streams and readers created from other types (e.g. Files) WILL be automatically closed.
051 */
052public final class ParserPipe implements Closeable {
053
054   private final Object input;
055   final boolean debug, strict, autoCloseStreams, unbuffered;
056   private final String fileCharset, inputStreamCharset;
057
058   private String inputString;
059   private InputStream inputStream;
060   private Reader reader;
061   private ParserReader parserReader;
062   private boolean doClose;
063   private BinaryFormat binaryFormat;
064   private Positionable positionable;
065
066   /**
067    * Constructor for reader-based parsers.
068    *
069    * @param input The parser input object.
070    * @param debug
071    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
072    *    method.
073    *    This allows the contents of the pipe to be accessed when a problem occurs.
074    * @param strict
075    *    If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
076    *    and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
077    *    Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
078    * @param autoCloseStreams
079    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
080    * @param unbuffered
081    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
082    *    multiple times.
083    *    <br>Otherwise, we read character data into a reusable buffer.
084    * @param fileCharset
085    *    The charset to expect when reading from {@link File Files}.
086    *    Use <js>"default"</js> to specify {@link Charset#defaultCharset()}.
087    * @param inputStreamCharset
088    *    The charset to expect when reading from {@link InputStream InputStreams}.
089    *    Use <js>"default"</js> to specify {@link Charset#defaultCharset()}.
090    */
091   public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, String fileCharset, String inputStreamCharset) {
092      this.input = input;
093      this.debug = debug;
094      this.strict = strict;
095      this.autoCloseStreams = autoCloseStreams;
096      this.unbuffered = unbuffered;
097      this.fileCharset = fileCharset;
098      this.inputStreamCharset = inputStreamCharset;
099      if (input instanceof CharSequence)
100         this.inputString = input.toString();
101      this.binaryFormat = null;
102   }
103
104   /**
105    * Constructor for stream-based parsers.
106    *
107    * @param input The parser input object.
108    * @param debug
109    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
110    *    method.
111    *    This allows the contents of the pipe to be accessed when a problem occurs.
112    * @param autoCloseStreams
113    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
114    * @param unbuffered
115    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
116    *    multiple times.
117    *    <br>Otherwise, we read character data into a reusable buffer.
118    * @param binaryFormat The binary format of input strings when converted to bytes.
119    */
120   public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
121      this.input = input;
122      this.debug = debug;
123      this.strict = false;
124      this.autoCloseStreams = autoCloseStreams;
125      this.unbuffered = unbuffered;
126      this.fileCharset = null;
127      this.inputStreamCharset = null;
128      if (input instanceof CharSequence)
129         this.inputString = input.toString();
130      this.binaryFormat = binaryFormat;
131   }
132
133   /**
134    * Shortcut constructor, typically for straight string input.
135    *
136    * <p>
137    * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
138    *
139    * @param input The input object.
140    */
141   public ParserPipe(Object input) {
142      this(input, false, false, false, false, null, null);
143   }
144
145   /**
146    * Wraps the specified input object inside an input stream.
147    *
148    * <p>
149    * Subclasses can override this method to implement their own input streams.
150    *
151    * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
152    * @throws IOException If object could not be converted to an input stream.
153    */
154   public InputStream getInputStream() throws IOException {
155      if (input == null)
156         return null;
157
158      if (input instanceof InputStream) {
159         if (debug) {
160            byte[] b = readBytes((InputStream)input, 1024);
161            inputString = toHex(b);
162            inputStream = new ByteArrayInputStream(b);
163         } else {
164            inputStream = (InputStream)input;
165            doClose = autoCloseStreams;
166         }
167      } else if (input instanceof byte[]) {
168         if (debug)
169            inputString = toHex((byte[])input);
170         inputStream = new ByteArrayInputStream((byte[])input);
171         doClose = false;
172      } else if (input instanceof String) {
173         inputString = (String)input;
174         inputStream = new ByteArrayInputStream(convertFromString((String)input));
175         doClose = false;
176      } else if (input instanceof File) {
177         if (debug) {
178            byte[] b = readBytes((File)input);
179            inputString = toHex(b);
180            inputStream = new ByteArrayInputStream(b);
181         } else {
182            inputStream = new FileInputStream((File)input);
183            doClose = true;
184         }
185      } else {
186         throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to an InputStream.");
187      }
188
189      return inputStream;
190   }
191
192   private byte[] convertFromString(String in) {
193      switch(binaryFormat) {
194         case BASE64: return base64Decode(in);
195         case HEX: return fromHex(in);
196         case SPACED_HEX: return fromSpacedHex(in);
197         default: return new byte[0];
198      }
199   }
200
201   /**
202    * Wraps the specified input object inside a reader.
203    *
204    * <p>
205    * Subclasses can override this method to implement their own readers.
206    *
207    * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
208    * @throws IOException If object could not be converted to a reader.
209    */
210   public Reader getReader() throws IOException {
211      if (input == null)
212         return null;
213
214      if (input instanceof Reader) {
215         if (debug) {
216            inputString = read((Reader)input);
217            reader = new StringReader(inputString);
218         } else {
219            reader = (Reader)input;
220            doClose = autoCloseStreams;
221         }
222      } else if (input instanceof CharSequence) {
223         inputString = input.toString();
224         reader = new ParserReader(this);
225         doClose = false;
226      } else if (input instanceof InputStream || input instanceof byte[]) {
227         doClose = input instanceof InputStream && autoCloseStreams;
228         InputStream is = (
229            input instanceof InputStream
230            ? (InputStream)input
231            : new ByteArrayInputStream((byte[])input)
232         );
233         CharsetDecoder cd = (
234            "default".equalsIgnoreCase(inputStreamCharset)
235            ? Charset.defaultCharset()
236            : Charset.forName(inputStreamCharset)
237         ).newDecoder();
238         if (strict) {
239            cd.onMalformedInput(CodingErrorAction.REPORT);
240            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
241         } else {
242            cd.onMalformedInput(CodingErrorAction.REPLACE);
243            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
244         }
245         reader = new InputStreamReader(is, cd);
246         if (debug) {
247            inputString = read(reader);
248            reader = new StringReader(inputString);
249         }
250      } else if (input instanceof File) {
251         CharsetDecoder cd = (
252            "DEFAULT".equalsIgnoreCase(fileCharset)
253            ? Charset.defaultCharset()
254            : Charset.forName(fileCharset)
255         ).newDecoder();
256         if (strict) {
257            cd.onMalformedInput(CodingErrorAction.REPORT);
258            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
259         } else {
260            cd.onMalformedInput(CodingErrorAction.REPLACE);
261            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
262         }
263         reader = new InputStreamReader(new FileInputStream((File)input), cd);
264         if (debug) {
265            inputString = read(reader);
266            reader = new StringReader(inputString);
267         }
268         doClose = true;
269      } else {
270         throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to a Reader.");
271      }
272
273      return reader;
274   }
275
276   /**
277    * Returns the contents of this pipe as a buffered reader.
278    *
279    * <p>
280    * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
281    *
282    * @return The contents of this pipe as a buffered reader.
283    * @throws Exception
284    */
285   public Reader getBufferedReader() throws Exception {
286      return IOUtils.getBufferedReader(getReader());
287   }
288
289   /**
290    * Returns the input to this parser as a plain string.
291    *
292    * <p>
293    * This method only returns a value if {@link BeanContext#BEAN_debug} is enabled.
294    *
295    * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
296    */
297   public String getInputAsString() {
298      return inputString;
299   }
300
301   /**
302    * Converts this pipe into a {@link ParserReader}.
303    *
304    * @return The converted pipe.
305    * @throws Exception
306    */
307   public ParserReader getParserReader() throws Exception {
308      if (input == null)
309         return null;
310      if (input instanceof ParserReader)
311         parserReader = (ParserReader)input;
312      else
313         parserReader = new ParserReader(this);
314      return parserReader;
315   }
316
317   /**
318    * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
319    *
320    * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
321    */
322   public boolean isString() {
323      return inputString != null;
324   }
325
326   /**
327    * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
328    *
329    * <p>
330    * Used for gathering the failure position when {@link ParseException} is thrown.
331    *
332    * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
333    */
334   public void setPositionable(Positionable positionable) {
335      this.positionable = positionable;
336   }
337
338   Position getPosition() {
339      if (positionable == null)
340         return Position.UNKNOWN;
341      Position p = positionable.getPosition();
342      if (p == null)
343         return Position.UNKNOWN;
344      return p;
345   }
346
347   @Override /* Closeable */
348   public void close() {
349      try {
350         if (doClose)
351            IOUtils.close(reader, inputStream);
352      } catch (IOException e) {
353         throw new BeanRuntimeException(e);
354      }
355   }
356}