001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.parser; 014 015import static org.apache.juneau.internal.IOUtils.*; 016import static org.apache.juneau.internal.StringUtils.*; 017 018import java.io.*; 019import java.nio.charset.*; 020 021import org.apache.juneau.*; 022import org.apache.juneau.internal.*; 023 024/** 025 * A wrapper around an object that a parser reads its input from. 026 * 027 * <p> 028 * For character-based parsers, the input object can be any of the following: 029 * <ul> 030 * <li>{@link Reader} 031 * <li>{@link CharSequence} 032 * <li>{@link InputStream} 033 * <li><code><jk>byte</jk>[]</code> 034 * <li>{@link File} 035 * <li><code><jk>null</jk></code> 036 * </ul> 037 * 038 * <p> 039 * For stream-based parsers, the input object can be any of the following: 040 * <ul> 041 * <li>{@link InputStream} 042 * <li><code><jk>byte</jk>[]</code> 043 * <li>{@link File} 044 * <li>{@link String} - Hex-encoded bytes. (not BASE-64!) 045 * <li><code><jk>null</jk></code> 046 * </ul> 047 * 048 * <p> 049 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but 050 * streams and readers created from other types (e.g. Files) WILL be automatically closed. 051 */ 052public final class ParserPipe implements Closeable { 053 054 private final Object input; 055 final boolean debug, strict, autoCloseStreams, unbuffered; 056 private final Charset charset; 057 058 private String inputString; 059 private InputStream inputStream; 060 private Reader reader; 061 private ParserReader parserReader; 062 private boolean doClose; 063 private BinaryFormat binaryFormat; 064 private Positionable positionable; 065 066 /** 067 * Constructor for reader-based parsers. 068 * 069 * @param input The parser input object. 070 * @param debug 071 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 072 * method. 073 * This allows the contents of the pipe to be accessed when a problem occurs. 074 * @param strict 075 * If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)} 076 * and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}. 077 * Otherwise, sets them to {@link CodingErrorAction#REPLACE}. 078 * @param autoCloseStreams 079 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 080 * @param unbuffered 081 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 082 * multiple times. 083 * <br>Otherwise, we read character data into a reusable buffer. 084 * @param fileCharset 085 * The charset to expect when reading from {@link File Files}. 086 * @param streamCharset 087 * The charset to expect when reading from {@link InputStream InputStreams}. 088 */ 089 public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) { 090 boolean isFile = input instanceof File; 091 this.input = input; 092 this.debug = debug; 093 this.strict = strict; 094 this.autoCloseStreams = autoCloseStreams; 095 this.unbuffered = unbuffered; 096 Charset cs = isFile ? fileCharset : streamCharset; 097 if (cs == null) 098 cs = (isFile ? Charset.defaultCharset() : UTF8); 099 this.charset = cs; 100 if (input instanceof CharSequence) 101 this.inputString = input.toString(); 102 this.binaryFormat = null; 103 } 104 105 /** 106 * Constructor for stream-based parsers. 107 * 108 * @param input The parser input object. 109 * @param debug 110 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 111 * method. 112 * This allows the contents of the pipe to be accessed when a problem occurs. 113 * @param autoCloseStreams 114 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 115 * @param unbuffered 116 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 117 * multiple times. 118 * <br>Otherwise, we read character data into a reusable buffer. 119 * @param binaryFormat The binary format of input strings when converted to bytes. 120 */ 121 public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) { 122 this.input = input; 123 this.debug = debug; 124 this.strict = false; 125 this.autoCloseStreams = autoCloseStreams; 126 this.unbuffered = unbuffered; 127 this.charset = null; 128 if (input instanceof CharSequence) 129 this.inputString = input.toString(); 130 this.binaryFormat = binaryFormat; 131 } 132 133 /** 134 * Shortcut constructor, typically for straight string input. 135 * 136 * <p> 137 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code> 138 * 139 * @param input The input object. 140 */ 141 public ParserPipe(Object input) { 142 this(input, false, false, false, false, null, null); 143 } 144 145 /** 146 * Wraps the specified input object inside an input stream. 147 * 148 * <p> 149 * Subclasses can override this method to implement their own input streams. 150 * 151 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null. 152 * @throws IOException If object could not be converted to an input stream. 153 */ 154 public InputStream getInputStream() throws IOException { 155 if (input == null) 156 return null; 157 158 if (input instanceof InputStream) { 159 if (debug) { 160 byte[] b = readBytes((InputStream)input, 1024); 161 inputString = toHex(b); 162 inputStream = new ByteArrayInputStream(b); 163 } else { 164 inputStream = (InputStream)input; 165 doClose = autoCloseStreams; 166 } 167 } else if (input instanceof byte[]) { 168 if (debug) 169 inputString = toHex((byte[])input); 170 inputStream = new ByteArrayInputStream((byte[])input); 171 doClose = false; 172 } else if (input instanceof String) { 173 inputString = (String)input; 174 inputStream = new ByteArrayInputStream(convertFromString((String)input)); 175 doClose = false; 176 } else if (input instanceof File) { 177 if (debug) { 178 byte[] b = readBytes((File)input); 179 inputString = toHex(b); 180 inputStream = new ByteArrayInputStream(b); 181 } else { 182 inputStream = new FileInputStream((File)input); 183 doClose = true; 184 } 185 } else { 186 throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to an InputStream."); 187 } 188 189 return inputStream; 190 } 191 192 private byte[] convertFromString(String in) { 193 switch(binaryFormat) { 194 case BASE64: return base64Decode(in); 195 case HEX: return fromHex(in); 196 case SPACED_HEX: return fromSpacedHex(in); 197 default: return new byte[0]; 198 } 199 } 200 201 /** 202 * Wraps the specified input object inside a reader. 203 * 204 * <p> 205 * Subclasses can override this method to implement their own readers. 206 * 207 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null. 208 * @throws IOException If object could not be converted to a reader. 209 */ 210 public Reader getReader() throws IOException { 211 if (input == null) 212 return null; 213 214 if (input instanceof Reader) { 215 if (debug) { 216 inputString = read((Reader)input); 217 reader = new StringReader(inputString); 218 } else { 219 reader = (Reader)input; 220 doClose = autoCloseStreams; 221 } 222 } else if (input instanceof CharSequence) { 223 inputString = input.toString(); 224 reader = new ParserReader(this); 225 doClose = false; 226 } else if (input instanceof InputStream || input instanceof byte[]) { 227 doClose = input instanceof InputStream && autoCloseStreams; 228 InputStream is = ( 229 input instanceof InputStream 230 ? (InputStream)input 231 : new ByteArrayInputStream((byte[])input) 232 ); 233 CharsetDecoder cd = charset.newDecoder(); 234 if (strict) { 235 cd.onMalformedInput(CodingErrorAction.REPORT); 236 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 237 } else { 238 cd.onMalformedInput(CodingErrorAction.REPLACE); 239 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 240 } 241 reader = new InputStreamReader(is, cd); 242 if (debug) { 243 inputString = read(reader); 244 reader = new StringReader(inputString); 245 } 246 } else if (input instanceof File) { 247 CharsetDecoder cd = charset.newDecoder(); 248 if (strict) { 249 cd.onMalformedInput(CodingErrorAction.REPORT); 250 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 251 } else { 252 cd.onMalformedInput(CodingErrorAction.REPLACE); 253 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 254 } 255 reader = new InputStreamReader(new FileInputStream((File)input), cd); 256 if (debug) { 257 inputString = read(reader); 258 reader = new StringReader(inputString); 259 } 260 doClose = true; 261 } else { 262 throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to a Reader."); 263 } 264 265 return reader; 266 } 267 268 /** 269 * Returns the contents of this pipe as a buffered reader. 270 * 271 * <p> 272 * If the reader passed into this pipe is already a buffered reader, that reader will be returned. 273 * 274 * @return The contents of this pipe as a buffered reader. 275 * @throws IOException Thrown by underlying stream. 276 */ 277 public Reader getBufferedReader() throws IOException { 278 return IOUtils.getBufferedReader(getReader()); 279 } 280 281 /** 282 * Returns the input to this parser as a plain string. 283 * 284 * <p> 285 * This method only returns a value if {@link Context#CONTEXT_debug} is enabled. 286 * 287 * @return The input as a string, or <jk>null</jk> if debug mode not enabled. 288 */ 289 public String getInputAsString() { 290 return inputString; 291 } 292 293 /** 294 * Returns the contents of this pipe as a string. 295 * 296 * @return The contents of this pipe as a string. 297 * @throws IOException If thrown from inner reader. 298 */ 299 public String asString() throws IOException { 300 if (inputString == null) 301 inputString = IOUtils.read(getReader()); 302 return inputString; 303 } 304 305 /** 306 * Converts this pipe into a {@link ParserReader}. 307 * 308 * @return The converted pipe. 309 * @throws IOException Thrown by underlying stream. 310 */ 311 public ParserReader getParserReader() throws IOException { 312 if (input == null) 313 return null; 314 if (input instanceof ParserReader) 315 parserReader = (ParserReader)input; 316 else 317 parserReader = new ParserReader(this); 318 return parserReader; 319 } 320 321 /** 322 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 323 * 324 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 325 */ 326 public boolean isString() { 327 return inputString != null; 328 } 329 330 /** 331 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe. 332 * 333 * <p> 334 * Used for gathering the failure position when {@link ParseException} is thrown. 335 * 336 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe. 337 */ 338 public void setPositionable(Positionable positionable) { 339 this.positionable = positionable; 340 } 341 342 Position getPosition() { 343 if (positionable == null) 344 return Position.UNKNOWN; 345 Position p = positionable.getPosition(); 346 if (p == null) 347 return Position.UNKNOWN; 348 return p; 349 } 350 351 @Override /* Closeable */ 352 public void close() { 353 try { 354 if (doClose) 355 IOUtils.close(reader, inputStream); 356 } catch (IOException e) { 357 throw new BeanRuntimeException(e); 358 } 359 } 360}