001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.parser;
018
019import static org.apache.juneau.common.utils.IOUtils.*;
020import static org.apache.juneau.common.utils.StringUtils.*;
021import static org.apache.juneau.internal.ClassUtils.*;
022
023import java.io.*;
024import java.nio.charset.*;
025
026import org.apache.juneau.*;
027import org.apache.juneau.common.utils.*;
028
029/**
030 * A wrapper around an object that a parser reads its input from.
031 *
032 * <p>
033 * For character-based parsers, the input object can be any of the following:
034 * <ul>
035 *    <li>{@link Reader}
036 *    <li>{@link CharSequence}
037 *    <li>{@link InputStream}
038 *    <li><code><jk>byte</jk>[]</code>
039 *    <li>{@link File}
040 *    <li><code><jk>null</jk></code>
041 * </ul>
042 *
043 * <p>
044 * For stream-based parsers, the input object can be any of the following:
045 * <ul>
046 *    <li>{@link InputStream}
047 *    <li><code><jk>byte</jk>[]</code>
048 *    <li>{@link File}
049 *    <li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
050 *    <li><code><jk>null</jk></code>
051 * </ul>
052 *
053 * <p>
054 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
055 * streams and readers created from other types (e.g. Files) WILL be automatically closed.
056 *
057 * <h5 class='section'>See Also:</h5><ul>
058 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/SerializersAndParsers">Serializers and Parsers</a>
059 * </ul>
060 */
061public class ParserPipe implements Closeable {
062
063   private final Object input;
064   final boolean debug, strict, autoCloseStreams, unbuffered;
065   private final Charset charset;
066
067   private String inputString;
068   private InputStream inputStream;
069   private Reader reader;
070   private ParserReader parserReader;
071   private boolean doClose;
072   private BinaryFormat binaryFormat;
073   private Positionable positionable;
074
075   /**
076    * Constructor for reader-based parsers.
077    *
078    * @param input The parser input object.
079    * @param debug
080    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
081    *    method.
082    *    This allows the contents of the pipe to be accessed when a problem occurs.
083    * @param strict
084    *    If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
085    *    and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
086    *    Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
087    * @param autoCloseStreams
088    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
089    * @param unbuffered
090    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
091    *    multiple times.
092    *    <br>Otherwise, we read character data into a reusable buffer.
093    * @param fileCharset
094    *    The charset to expect when reading from {@link File Files}.
095    * @param streamCharset
096    *    The charset to expect when reading from {@link InputStream InputStreams}.
097    */
098   public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) {
099      boolean isFile = input instanceof File;
100      this.input = input;
101      this.debug = debug;
102      this.strict = strict;
103      this.autoCloseStreams = autoCloseStreams;
104      this.unbuffered = unbuffered;
105      Charset cs = isFile ? fileCharset : streamCharset;
106      if (cs == null)
107         cs = (isFile ? Charset.defaultCharset() : UTF8);
108      this.charset = cs;
109      if (input instanceof CharSequence)
110         this.inputString = input.toString();
111      this.binaryFormat = null;
112   }
113
114   /**
115    * Constructor for stream-based parsers.
116    *
117    * @param input The parser input object.
118    * @param debug
119    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
120    *    method.
121    *    This allows the contents of the pipe to be accessed when a problem occurs.
122    * @param autoCloseStreams
123    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
124    * @param unbuffered
125    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
126    *    multiple times.
127    *    <br>Otherwise, we read character data into a reusable buffer.
128    * @param binaryFormat The binary format of input strings when converted to bytes.
129    */
130   public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
131      this.input = input;
132      this.debug = debug;
133      this.strict = false;
134      this.autoCloseStreams = autoCloseStreams;
135      this.unbuffered = unbuffered;
136      this.charset = null;
137      if (input instanceof CharSequence)
138         this.inputString = input.toString();
139      this.binaryFormat = binaryFormat;
140   }
141
142   /**
143    * Shortcut constructor, typically for straight string input.
144    *
145    * <p>
146    * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
147    *
148    * @param input The input object.
149    */
150   public ParserPipe(Object input) {
151      this(input, false, false, false, false, null, null);
152   }
153
154   /**
155    * Wraps the specified input object inside an input stream.
156    *
157    * <p>
158    * Subclasses can override this method to implement their own input streams.
159    *
160    * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
161    * @throws IOException If object could not be converted to an input stream.
162    */
163   public InputStream getInputStream() throws IOException {
164      if (input == null)
165         return null;
166
167      if (input instanceof InputStream) {
168         if (debug) {
169            byte[] b = readBytes((InputStream)input);
170            inputString = toHex(b);
171            inputStream = new ByteArrayInputStream(b);
172         } else {
173            inputStream = (InputStream)input;
174            doClose = autoCloseStreams;
175         }
176      } else if (input instanceof byte[]) {
177         if (debug)
178            inputString = toHex((byte[])input);
179         inputStream = new ByteArrayInputStream((byte[])input);
180         doClose = false;
181      } else if (input instanceof String) {
182         inputString = (String)input;
183         inputStream = new ByteArrayInputStream(convertFromString((String)input));
184         doClose = false;
185      } else if (input instanceof File) {
186         if (debug) {
187            byte[] b = readBytes((File)input);
188            inputString = toHex(b);
189            inputStream = new ByteArrayInputStream(b);
190         } else {
191            inputStream = new FileInputStream((File)input);
192            doClose = true;
193         }
194      } else {
195         throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream.");
196      }
197
198      return inputStream;
199   }
200
201   private byte[] convertFromString(String in) {
202      switch(binaryFormat) {
203         case BASE64: return base64Decode(in);
204         case HEX: return fromHex(in);
205         case SPACED_HEX: return fromSpacedHex(in);
206         default: return new byte[0];
207      }
208   }
209
210   /**
211    * Wraps the specified input object inside a reader.
212    *
213    * <p>
214    * Subclasses can override this method to implement their own readers.
215    *
216    * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
217    * @throws IOException If object could not be converted to a reader.
218    */
219   public Reader getReader() throws IOException {
220      if (input == null)
221         return null;
222
223      if (input instanceof Reader) {
224         if (debug) {
225            inputString = read((Reader)input);
226            reader = new StringReader(inputString);
227         } else {
228            reader = (Reader)input;
229            doClose = autoCloseStreams;
230         }
231      } else if (input instanceof CharSequence) {
232         inputString = input.toString();
233         reader = new ParserReader(this);
234         doClose = false;
235      } else if (input instanceof InputStream || input instanceof byte[]) {
236         doClose = input instanceof InputStream && autoCloseStreams;
237         InputStream is = (
238            input instanceof InputStream
239            ? (InputStream)input
240            : new ByteArrayInputStream((byte[])input)
241         );
242         CharsetDecoder cd = charset.newDecoder();
243         if (strict) {
244            cd.onMalformedInput(CodingErrorAction.REPORT);
245            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
246         } else {
247            cd.onMalformedInput(CodingErrorAction.REPLACE);
248            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
249         }
250         reader = new InputStreamReader(is, cd);
251         if (debug) {
252            inputString = read(reader);
253            reader = new StringReader(inputString);
254         }
255      } else if (input instanceof File) {
256         CharsetDecoder cd = charset.newDecoder();
257         if (strict) {
258            cd.onMalformedInput(CodingErrorAction.REPORT);
259            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
260         } else {
261            cd.onMalformedInput(CodingErrorAction.REPLACE);
262            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
263         }
264         reader = new InputStreamReader(new FileInputStream((File)input), cd);
265         if (debug) {
266            inputString = read(reader);
267            reader = new StringReader(inputString);
268         }
269         doClose = true;
270      } else {
271         throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream.");
272      }
273
274      return reader;
275   }
276
277   /**
278    * Returns the contents of this pipe as a buffered reader.
279    *
280    * <p>
281    * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
282    *
283    * @return The contents of this pipe as a buffered reader.
284    * @throws IOException Thrown by underlying stream.
285    */
286   public Reader getBufferedReader() throws IOException {
287      return toBufferedReader(getReader());
288   }
289
290   /**
291    * Returns the input to this parser as a plain string.
292    *
293    * <p>
294    * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled.
295    *
296    * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
297    */
298   public String getInputAsString() {
299      return inputString;
300   }
301
302   /**
303    * Returns the contents of this pipe as a string.
304    *
305    * @return The contents of this pipe as a string.
306    * @throws IOException If thrown from inner reader.
307    */
308   public String asString() throws IOException {
309      if (inputString == null)
310         inputString = read(getReader());
311      return inputString;
312   }
313
314   /**
315    * Converts this pipe into a {@link ParserReader}.
316    *
317    * @return The converted pipe.
318    * @throws IOException Thrown by underlying stream.
319    */
320   public ParserReader getParserReader() throws IOException {
321      if (input == null)
322         return null;
323      if (input instanceof ParserReader)
324         parserReader = (ParserReader)input;
325      else
326         parserReader = new ParserReader(this);
327      return parserReader;
328   }
329
330   /**
331    * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
332    *
333    * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
334    */
335   public boolean isString() {
336      return inputString != null;
337   }
338
339   /**
340    * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
341    *
342    * <p>
343    * Used for gathering the failure position when {@link ParseException} is thrown.
344    *
345    * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
346    */
347   public void setPositionable(Positionable positionable) {
348      this.positionable = positionable;
349   }
350
351   Position getPosition() {
352      if (positionable == null)
353         return Position.UNKNOWN;
354      Position p = positionable.getPosition();
355      if (p == null)
356         return Position.UNKNOWN;
357      return p;
358   }
359
360   @Override /* Closeable */
361   public void close() {
362      try {
363         if (doClose)
364            IOUtils.close(reader, inputStream);
365      } catch (IOException e) {
366         throw new BeanRuntimeException(e);
367      }
368   }
369}