View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.juneau.parser;
18  
19  import static org.apache.juneau.commons.utils.IoUtils.*;
20  import static org.apache.juneau.commons.utils.StringUtils.*;
21  import static org.apache.juneau.commons.utils.ThrowableUtils.*;
22  import static org.apache.juneau.commons.utils.Utils.*;
23  
24  import java.io.*;
25  import java.nio.charset.*;
26  
27  import org.apache.juneau.*;
28  import org.apache.juneau.commons.utils.*;
29  
30  /**
31   * A wrapper around an object that a parser reads its input from.
32   *
33   * <p>
34   * For character-based parsers, the input object can be any of the following:
35   * <ul>
36   * 	<li>{@link Reader}
37   * 	<li>{@link CharSequence}
38   * 	<li>{@link InputStream}
39   * 	<li><code><jk>byte</jk>[]</code>
40   * 	<li>{@link File}
41   * 	<li><code><jk>null</jk></code>
42   * </ul>
43   *
44   * <p>
45   * For stream-based parsers, the input object can be any of the following:
46   * <ul>
47   * 	<li>{@link InputStream}
48   * 	<li><code><jk>byte</jk>[]</code>
49   * 	<li>{@link File}
50   * 	<li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
51   * 	<li><code><jk>null</jk></code>
52   * </ul>
53   *
54   * <p>
55   * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
56   * streams and readers created from other types (e.g. Files) WILL be automatically closed.
57   *
58   * <h5 class='section'>See Also:</h5><ul>
59   * 	<li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/SerializersAndParsers">Serializers and Parsers</a>
60   * </ul>
61   */
62  @SuppressWarnings("resource")
63  public class ParserPipe implements Closeable {
64  
65  	private final Object input;
66  	final boolean debug, strict, autoCloseStreams, unbuffered;
67  	private final Charset charset;
68  
69  	private String inputString;
70  	private InputStream inputStream;
71  	private Reader reader;
72  	private ParserReader parserReader;
73  	private boolean doClose;
74  	private BinaryFormat binaryFormat;
75  	private Positionable positionable;
76  
77  	/**
78  	 * Shortcut constructor, typically for straight string input.
79  	 *
80  	 * <p>
81  	 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
82  	 *
83  	 * @param input The input object.
84  	 */
85  	public ParserPipe(Object input) {
86  		this(input, false, false, false, false, null, null);
87  	}
88  
89  	/**
90  	 * Constructor for stream-based parsers.
91  	 *
92  	 * @param input The parser input object.
93  	 * @param debug
94  	 * 	If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
95  	 * 	method.
96  	 * 	This allows the contents of the pipe to be accessed when a problem occurs.
97  	 * @param autoCloseStreams
98  	 * 	Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
99  	 * @param unbuffered
100 	 * 	If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
101 	 * 	multiple times.
102 	 * 	<br>Otherwise, we read character data into a reusable buffer.
103 	 * @param binaryFormat The binary format of input strings when converted to bytes.
104 	 */
105 	public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
106 		this.input = input;
107 		this.debug = debug;
108 		this.strict = false;
109 		this.autoCloseStreams = autoCloseStreams;
110 		this.unbuffered = unbuffered;
111 		this.charset = null;
112 		if (input instanceof CharSequence input2)
113 			this.inputString = input2.toString();
114 		this.binaryFormat = binaryFormat;
115 	}
116 
117 	/**
118 	 * Constructor for reader-based parsers.
119 	 *
120 	 * @param input The parser input object.
121 	 * @param debug
122 	 * 	If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
123 	 * 	method.
124 	 * 	This allows the contents of the pipe to be accessed when a problem occurs.
125 	 * @param strict
126 	 * 	If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
127 	 * 	and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
128 	 * 	Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
129 	 * @param autoCloseStreams
130 	 * 	Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
131 	 * @param unbuffered
132 	 * 	If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
133 	 * 	multiple times.
134 	 * 	<br>Otherwise, we read character data into a reusable buffer.
135 	 * @param fileCharset
136 	 * 	The charset to expect when reading from {@link File Files}.
137 	 * @param streamCharset
138 	 * 	The charset to expect when reading from {@link InputStream InputStreams}.
139 	 */
140 	public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) {
141 		boolean isFile = input instanceof File;
142 		this.input = input;
143 		this.debug = debug;
144 		this.strict = strict;
145 		this.autoCloseStreams = autoCloseStreams;
146 		this.unbuffered = unbuffered;
147 		Charset cs = isFile ? fileCharset : streamCharset;
148 		if (cs == null)
149 			cs = (isFile ? Charset.defaultCharset() : UTF8);
150 		this.charset = cs;
151 		if (input instanceof CharSequence cs2)
152 			this.inputString = cs2.toString();
153 		this.binaryFormat = null;
154 	}
155 
156 	/**
157 	 * Returns the contents of this pipe as a string.
158 	 *
159 	 * @return The contents of this pipe as a string.
160 	 * @throws IOException If thrown from inner reader.
161 	 */
162 	public String asString() throws IOException {
163 		if (inputString == null)
164 			inputString = read(getReader());
165 		return inputString;
166 	}
167 
168 	@Override /* Overridden from Closeable */
169 	public void close() {
170 		try {
171 			if (doClose)
172 				IoUtils.close(reader, inputStream);
173 		} catch (IOException e) {
174 			throw bex(e);
175 		}
176 	}
177 
178 	/**
179 	 * Returns the contents of this pipe as a buffered reader.
180 	 *
181 	 * <p>
182 	 * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
183 	 *
184 	 * @return The contents of this pipe as a buffered reader.
185 	 * @throws IOException Thrown by underlying stream.
186 	 */
187 	public Reader getBufferedReader() throws IOException { return toBufferedReader(getReader()); }
188 
189 	/**
190 	 * Returns the input to this parser as a plain string.
191 	 *
192 	 * <p>
193 	 * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled.
194 	 *
195 	 * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
196 	 */
197 	public String getInputAsString() { return inputString; }
198 
199 	/**
200 	 * Wraps the specified input object inside an input stream.
201 	 *
202 	 * <p>
203 	 * Subclasses can override this method to implement their own input streams.
204 	 *
205 	 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
206 	 * @throws IOException If object could not be converted to an input stream.
207 	 */
208 	public InputStream getInputStream() throws IOException {
209 		if (input == null)
210 			return null;
211 
212 		if (input instanceof InputStream input2) {
213 			if (debug) {
214 				var b = readBytes(input2);
215 				inputString = toHex(b);
216 				inputStream = new ByteArrayInputStream(b);
217 			} else {
218 				inputStream = input2;
219 				doClose = autoCloseStreams;
220 			}
221 		} else if (input instanceof byte[]) {
222 			if (debug)
223 				inputString = toHex((byte[])input);
224 			inputStream = new ByteArrayInputStream((byte[])input);
225 			doClose = false;
226 		} else if (input instanceof String input2) {
227 			inputString = input2;
228 			inputStream = new ByteArrayInputStream(convertFromString(input2));
229 			doClose = false;
230 		} else if (input instanceof File input2) {
231 			if (debug) {
232 				var b = readBytes(input2);
233 				inputString = toHex(b);
234 				inputStream = new ByteArrayInputStream(b);
235 			} else {
236 				inputStream = new FileInputStream(input2);
237 				doClose = true;
238 			}
239 		} else {
240 			throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input));
241 		}
242 
243 		return inputStream;
244 	}
245 
246 	/**
247 	 * Converts this pipe into a {@link ParserReader}.
248 	 *
249 	 * @return The converted pipe.
250 	 * @throws IOException Thrown by underlying stream.
251 	 */
252 	public ParserReader getParserReader() throws IOException {
253 		if (input == null)
254 			return null;
255 		if (input instanceof ParserReader input2)
256 			parserReader = input2;
257 		else
258 			parserReader = new ParserReader(this);
259 		return parserReader;
260 	}
261 
262 	/**
263 	 * Wraps the specified input object inside a reader.
264 	 *
265 	 * <p>
266 	 * Subclasses can override this method to implement their own readers.
267 	 *
268 	 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
269 	 * @throws IOException If object could not be converted to a reader.
270 	 */
271 	public Reader getReader() throws IOException {
272 		if (input == null)
273 			return null;
274 
275 		if (input instanceof Reader input2) {
276 			if (debug) {
277 				inputString = read(input2);
278 				reader = new StringReader(inputString);
279 			} else {
280 				reader = input2;
281 				doClose = autoCloseStreams;
282 			}
283 		} else if (input instanceof CharSequence input2) {
284 			inputString = input2.toString();
285 			reader = new ParserReader(this);
286 			doClose = false;
287 		} else if (input instanceof InputStream || input instanceof byte[]) {
288 			doClose = input instanceof InputStream && autoCloseStreams;
289 			InputStream is = (input instanceof InputStream input2 ? input2 : new ByteArrayInputStream((byte[])input));
290 			CharsetDecoder cd = charset.newDecoder();
291 			if (strict) {
292 				cd.onMalformedInput(CodingErrorAction.REPORT);
293 				cd.onUnmappableCharacter(CodingErrorAction.REPORT);
294 			} else {
295 				cd.onMalformedInput(CodingErrorAction.REPLACE);
296 				cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
297 			}
298 			reader = new InputStreamReader(is, cd);
299 			if (debug) {
300 				inputString = read(reader);
301 				reader = new StringReader(inputString);
302 			}
303 		} else if (input instanceof File input2) {
304 			CharsetDecoder cd = charset.newDecoder();
305 			if (strict) {
306 				cd.onMalformedInput(CodingErrorAction.REPORT);
307 				cd.onUnmappableCharacter(CodingErrorAction.REPORT);
308 			} else {
309 				cd.onMalformedInput(CodingErrorAction.REPLACE);
310 				cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
311 			}
312 			reader = new InputStreamReader(new FileInputStream(input2), cd);
313 			if (debug) {
314 				inputString = read(reader);
315 				reader = new StringReader(inputString);
316 			}
317 			doClose = true;
318 		} else {
319 			throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input));
320 		}
321 
322 		return reader;
323 	}
324 
325 	/**
326 	 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
327 	 *
328 	 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
329 	 */
330 	public boolean isString() { return nn(inputString); }
331 
332 	/**
333 	 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
334 	 *
335 	 * <p>
336 	 * Used for gathering the failure position when {@link ParseException} is thrown.
337 	 *
338 	 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
339 	 */
340 	public void setPositionable(Positionable positionable) { this.positionable = positionable; }
341 
342 	private byte[] convertFromString(String in) {
343 		return switch (binaryFormat) {
344 			case BASE64 -> base64Decode(in);
345 			case HEX -> fromHex(in);
346 			case SPACED_HEX -> fromSpacedHex(in);
347 			default -> new byte[0];
348 		};
349 	}
350 
351 	Position getPosition() {
352 		if (positionable == null)
353 			return Position.UNKNOWN;
354 		Position p = positionable.getPosition();
355 		if (p == null)
356 			return Position.UNKNOWN;
357 		return p;
358 	}
359 }