001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.parser; 018 019import static org.apache.juneau.common.utils.IOUtils.*; 020import static org.apache.juneau.common.utils.StringUtils.*; 021import static org.apache.juneau.internal.ClassUtils.*; 022 023import java.io.*; 024import java.nio.charset.*; 025 026import org.apache.juneau.*; 027import org.apache.juneau.common.utils.*; 028 029/** 030 * A wrapper around an object that a parser reads its input from. 031 * 032 * <p> 033 * For character-based parsers, the input object can be any of the following: 034 * <ul> 035 * <li>{@link Reader} 036 * <li>{@link CharSequence} 037 * <li>{@link InputStream} 038 * <li><code><jk>byte</jk>[]</code> 039 * <li>{@link File} 040 * <li><code><jk>null</jk></code> 041 * </ul> 042 * 043 * <p> 044 * For stream-based parsers, the input object can be any of the following: 045 * <ul> 046 * <li>{@link InputStream} 047 * <li><code><jk>byte</jk>[]</code> 048 * <li>{@link File} 049 * <li>{@link String} - Hex-encoded bytes. (not BASE-64!) 050 * <li><code><jk>null</jk></code> 051 * </ul> 052 * 053 * <p> 054 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but 055 * streams and readers created from other types (e.g. Files) WILL be automatically closed. 056 * 057 * <h5 class='section'>See Also:</h5><ul> 058 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/SerializersAndParsers">Serializers and Parsers</a> 059 * </ul> 060 */ 061public class ParserPipe implements Closeable { 062 063 private final Object input; 064 final boolean debug, strict, autoCloseStreams, unbuffered; 065 private final Charset charset; 066 067 private String inputString; 068 private InputStream inputStream; 069 private Reader reader; 070 private ParserReader parserReader; 071 private boolean doClose; 072 private BinaryFormat binaryFormat; 073 private Positionable positionable; 074 075 /** 076 * Constructor for reader-based parsers. 077 * 078 * @param input The parser input object. 079 * @param debug 080 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 081 * method. 082 * This allows the contents of the pipe to be accessed when a problem occurs. 083 * @param strict 084 * If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)} 085 * and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}. 086 * Otherwise, sets them to {@link CodingErrorAction#REPLACE}. 087 * @param autoCloseStreams 088 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 089 * @param unbuffered 090 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 091 * multiple times. 092 * <br>Otherwise, we read character data into a reusable buffer. 093 * @param fileCharset 094 * The charset to expect when reading from {@link File Files}. 095 * @param streamCharset 096 * The charset to expect when reading from {@link InputStream InputStreams}. 097 */ 098 public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) { 099 boolean isFile = input instanceof File; 100 this.input = input; 101 this.debug = debug; 102 this.strict = strict; 103 this.autoCloseStreams = autoCloseStreams; 104 this.unbuffered = unbuffered; 105 Charset cs = isFile ? fileCharset : streamCharset; 106 if (cs == null) 107 cs = (isFile ? Charset.defaultCharset() : UTF8); 108 this.charset = cs; 109 if (input instanceof CharSequence) 110 this.inputString = input.toString(); 111 this.binaryFormat = null; 112 } 113 114 /** 115 * Constructor for stream-based parsers. 116 * 117 * @param input The parser input object. 118 * @param debug 119 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 120 * method. 121 * This allows the contents of the pipe to be accessed when a problem occurs. 122 * @param autoCloseStreams 123 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 124 * @param unbuffered 125 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 126 * multiple times. 127 * <br>Otherwise, we read character data into a reusable buffer. 128 * @param binaryFormat The binary format of input strings when converted to bytes. 129 */ 130 public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) { 131 this.input = input; 132 this.debug = debug; 133 this.strict = false; 134 this.autoCloseStreams = autoCloseStreams; 135 this.unbuffered = unbuffered; 136 this.charset = null; 137 if (input instanceof CharSequence) 138 this.inputString = input.toString(); 139 this.binaryFormat = binaryFormat; 140 } 141 142 /** 143 * Shortcut constructor, typically for straight string input. 144 * 145 * <p> 146 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code> 147 * 148 * @param input The input object. 149 */ 150 public ParserPipe(Object input) { 151 this(input, false, false, false, false, null, null); 152 } 153 154 /** 155 * Wraps the specified input object inside an input stream. 156 * 157 * <p> 158 * Subclasses can override this method to implement their own input streams. 159 * 160 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null. 161 * @throws IOException If object could not be converted to an input stream. 162 */ 163 public InputStream getInputStream() throws IOException { 164 if (input == null) 165 return null; 166 167 if (input instanceof InputStream) { 168 if (debug) { 169 byte[] b = readBytes((InputStream)input); 170 inputString = toHex(b); 171 inputStream = new ByteArrayInputStream(b); 172 } else { 173 inputStream = (InputStream)input; 174 doClose = autoCloseStreams; 175 } 176 } else if (input instanceof byte[]) { 177 if (debug) 178 inputString = toHex((byte[])input); 179 inputStream = new ByteArrayInputStream((byte[])input); 180 doClose = false; 181 } else if (input instanceof String) { 182 inputString = (String)input; 183 inputStream = new ByteArrayInputStream(convertFromString((String)input)); 184 doClose = false; 185 } else if (input instanceof File) { 186 if (debug) { 187 byte[] b = readBytes((File)input); 188 inputString = toHex(b); 189 inputStream = new ByteArrayInputStream(b); 190 } else { 191 inputStream = new FileInputStream((File)input); 192 doClose = true; 193 } 194 } else { 195 throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream."); 196 } 197 198 return inputStream; 199 } 200 201 private byte[] convertFromString(String in) { 202 switch(binaryFormat) { 203 case BASE64: return base64Decode(in); 204 case HEX: return fromHex(in); 205 case SPACED_HEX: return fromSpacedHex(in); 206 default: return new byte[0]; 207 } 208 } 209 210 /** 211 * Wraps the specified input object inside a reader. 212 * 213 * <p> 214 * Subclasses can override this method to implement their own readers. 215 * 216 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null. 217 * @throws IOException If object could not be converted to a reader. 218 */ 219 public Reader getReader() throws IOException { 220 if (input == null) 221 return null; 222 223 if (input instanceof Reader) { 224 if (debug) { 225 inputString = read((Reader)input); 226 reader = new StringReader(inputString); 227 } else { 228 reader = (Reader)input; 229 doClose = autoCloseStreams; 230 } 231 } else if (input instanceof CharSequence) { 232 inputString = input.toString(); 233 reader = new ParserReader(this); 234 doClose = false; 235 } else if (input instanceof InputStream || input instanceof byte[]) { 236 doClose = input instanceof InputStream && autoCloseStreams; 237 InputStream is = ( 238 input instanceof InputStream 239 ? (InputStream)input 240 : new ByteArrayInputStream((byte[])input) 241 ); 242 CharsetDecoder cd = charset.newDecoder(); 243 if (strict) { 244 cd.onMalformedInput(CodingErrorAction.REPORT); 245 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 246 } else { 247 cd.onMalformedInput(CodingErrorAction.REPLACE); 248 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 249 } 250 reader = new InputStreamReader(is, cd); 251 if (debug) { 252 inputString = read(reader); 253 reader = new StringReader(inputString); 254 } 255 } else if (input instanceof File) { 256 CharsetDecoder cd = charset.newDecoder(); 257 if (strict) { 258 cd.onMalformedInput(CodingErrorAction.REPORT); 259 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 260 } else { 261 cd.onMalformedInput(CodingErrorAction.REPLACE); 262 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 263 } 264 reader = new InputStreamReader(new FileInputStream((File)input), cd); 265 if (debug) { 266 inputString = read(reader); 267 reader = new StringReader(inputString); 268 } 269 doClose = true; 270 } else { 271 throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream."); 272 } 273 274 return reader; 275 } 276 277 /** 278 * Returns the contents of this pipe as a buffered reader. 279 * 280 * <p> 281 * If the reader passed into this pipe is already a buffered reader, that reader will be returned. 282 * 283 * @return The contents of this pipe as a buffered reader. 284 * @throws IOException Thrown by underlying stream. 285 */ 286 public Reader getBufferedReader() throws IOException { 287 return toBufferedReader(getReader()); 288 } 289 290 /** 291 * Returns the input to this parser as a plain string. 292 * 293 * <p> 294 * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled. 295 * 296 * @return The input as a string, or <jk>null</jk> if debug mode not enabled. 297 */ 298 public String getInputAsString() { 299 return inputString; 300 } 301 302 /** 303 * Returns the contents of this pipe as a string. 304 * 305 * @return The contents of this pipe as a string. 306 * @throws IOException If thrown from inner reader. 307 */ 308 public String asString() throws IOException { 309 if (inputString == null) 310 inputString = read(getReader()); 311 return inputString; 312 } 313 314 /** 315 * Converts this pipe into a {@link ParserReader}. 316 * 317 * @return The converted pipe. 318 * @throws IOException Thrown by underlying stream. 319 */ 320 public ParserReader getParserReader() throws IOException { 321 if (input == null) 322 return null; 323 if (input instanceof ParserReader) 324 parserReader = (ParserReader)input; 325 else 326 parserReader = new ParserReader(this); 327 return parserReader; 328 } 329 330 /** 331 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 332 * 333 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 334 */ 335 public boolean isString() { 336 return inputString != null; 337 } 338 339 /** 340 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe. 341 * 342 * <p> 343 * Used for gathering the failure position when {@link ParseException} is thrown. 344 * 345 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe. 346 */ 347 public void setPositionable(Positionable positionable) { 348 this.positionable = positionable; 349 } 350 351 Position getPosition() { 352 if (positionable == null) 353 return Position.UNKNOWN; 354 Position p = positionable.getPosition(); 355 if (p == null) 356 return Position.UNKNOWN; 357 return p; 358 } 359 360 @Override /* Closeable */ 361 public void close() { 362 try { 363 if (doClose) 364 IOUtils.close(reader, inputStream); 365 } catch (IOException e) { 366 throw new BeanRuntimeException(e); 367 } 368 } 369}