001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.parser; 014 015import static org.apache.juneau.internal.IOUtils.*; 016import static org.apache.juneau.internal.StringUtils.*; 017 018import java.io.*; 019import java.nio.charset.*; 020 021import org.apache.juneau.*; 022import org.apache.juneau.internal.*; 023 024/** 025 * A wrapper around an object that a parser reads its input from. 026 * 027 * <p> 028 * For character-based parsers, the input object can be any of the following: 029 * <ul> 030 * <li>{@link Reader} 031 * <li>{@link CharSequence} 032 * <li>{@link InputStream} 033 * <li><code><jk>byte</jk>[]</code> 034 * <li>{@link File} 035 * <li><code><jk>null</jk></code> 036 * </ul> 037 * 038 * <p> 039 * For stream-based parsers, the input object can be any of the following: 040 * <ul> 041 * <li>{@link InputStream} 042 * <li><code><jk>byte</jk>[]</code> 043 * <li>{@link File} 044 * <li>{@link String} - Hex-encoded bytes. (not BASE-64!) 045 * <li><code><jk>null</jk></code> 046 * </ul> 047 * 048 * <p> 049 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but 050 * streams and readers created from other types (e.g. Files) WILL be automatically closed. 051 */ 052public final class ParserPipe implements Closeable { 053 054 private final Object input; 055 final boolean debug, strict, autoCloseStreams, unbuffered; 056 private final String fileCharset, inputStreamCharset; 057 058 private String inputString; 059 private InputStream inputStream; 060 private Reader reader; 061 private ParserReader parserReader; 062 private boolean doClose; 063 private BinaryFormat binaryFormat; 064 private Positionable positionable; 065 066 /** 067 * Constructor for reader-based parsers. 068 * 069 * @param input The parser input object. 070 * @param debug 071 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 072 * method. 073 * This allows the contents of the pipe to be accessed when a problem occurs. 074 * @param strict 075 * If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)} 076 * and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}. 077 * Otherwise, sets them to {@link CodingErrorAction#REPLACE}. 078 * @param autoCloseStreams 079 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 080 * @param unbuffered 081 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 082 * multiple times. 083 * <br>Otherwise, we read character data into a reusable buffer. 084 * @param fileCharset 085 * The charset to expect when reading from {@link File Files}. 086 * Use <js>"default"</js> to specify {@link Charset#defaultCharset()}. 087 * @param inputStreamCharset 088 * The charset to expect when reading from {@link InputStream InputStreams}. 089 * Use <js>"default"</js> to specify {@link Charset#defaultCharset()}. 090 */ 091 public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, String fileCharset, String inputStreamCharset) { 092 this.input = input; 093 this.debug = debug; 094 this.strict = strict; 095 this.autoCloseStreams = autoCloseStreams; 096 this.unbuffered = unbuffered; 097 this.fileCharset = fileCharset; 098 this.inputStreamCharset = inputStreamCharset; 099 if (input instanceof CharSequence) 100 this.inputString = input.toString(); 101 this.binaryFormat = null; 102 } 103 104 /** 105 * Constructor for stream-based parsers. 106 * 107 * @param input The parser input object. 108 * @param debug 109 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 110 * method. 111 * This allows the contents of the pipe to be accessed when a problem occurs. 112 * @param autoCloseStreams 113 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 114 * @param unbuffered 115 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 116 * multiple times. 117 * <br>Otherwise, we read character data into a reusable buffer. 118 * @param binaryFormat The binary format of input strings when converted to bytes. 119 */ 120 public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) { 121 this.input = input; 122 this.debug = debug; 123 this.strict = false; 124 this.autoCloseStreams = autoCloseStreams; 125 this.unbuffered = unbuffered; 126 this.fileCharset = null; 127 this.inputStreamCharset = null; 128 if (input instanceof CharSequence) 129 this.inputString = input.toString(); 130 this.binaryFormat = binaryFormat; 131 } 132 133 /** 134 * Shortcut constructor, typically for straight string input. 135 * 136 * <p> 137 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code> 138 * 139 * @param input The input object. 140 */ 141 public ParserPipe(Object input) { 142 this(input, false, false, false, false, null, null); 143 } 144 145 /** 146 * Wraps the specified input object inside an input stream. 147 * 148 * <p> 149 * Subclasses can override this method to implement their own input streams. 150 * 151 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null. 152 * @throws IOException If object could not be converted to an input stream. 153 */ 154 public InputStream getInputStream() throws IOException { 155 if (input == null) 156 return null; 157 158 if (input instanceof InputStream) { 159 if (debug) { 160 byte[] b = readBytes((InputStream)input, 1024); 161 inputString = toHex(b); 162 inputStream = new ByteArrayInputStream(b); 163 } else { 164 inputStream = (InputStream)input; 165 doClose = autoCloseStreams; 166 } 167 } else if (input instanceof byte[]) { 168 if (debug) 169 inputString = toHex((byte[])input); 170 inputStream = new ByteArrayInputStream((byte[])input); 171 doClose = false; 172 } else if (input instanceof String) { 173 inputString = (String)input; 174 inputStream = new ByteArrayInputStream(convertFromString((String)input)); 175 doClose = false; 176 } else if (input instanceof File) { 177 if (debug) { 178 byte[] b = readBytes((File)input); 179 inputString = toHex(b); 180 inputStream = new ByteArrayInputStream(b); 181 } else { 182 inputStream = new FileInputStream((File)input); 183 doClose = true; 184 } 185 } else { 186 throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to an InputStream."); 187 } 188 189 return inputStream; 190 } 191 192 private byte[] convertFromString(String in) { 193 switch(binaryFormat) { 194 case BASE64: return base64Decode(in); 195 case HEX: return fromHex(in); 196 case SPACED_HEX: return fromSpacedHex(in); 197 default: return new byte[0]; 198 } 199 } 200 201 /** 202 * Wraps the specified input object inside a reader. 203 * 204 * <p> 205 * Subclasses can override this method to implement their own readers. 206 * 207 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null. 208 * @throws IOException If object could not be converted to a reader. 209 */ 210 public Reader getReader() throws IOException { 211 if (input == null) 212 return null; 213 214 if (input instanceof Reader) { 215 if (debug) { 216 inputString = read((Reader)input); 217 reader = new StringReader(inputString); 218 } else { 219 reader = (Reader)input; 220 doClose = autoCloseStreams; 221 } 222 } else if (input instanceof CharSequence) { 223 inputString = input.toString(); 224 reader = new ParserReader(this); 225 doClose = false; 226 } else if (input instanceof InputStream || input instanceof byte[]) { 227 doClose = input instanceof InputStream && autoCloseStreams; 228 InputStream is = ( 229 input instanceof InputStream 230 ? (InputStream)input 231 : new ByteArrayInputStream((byte[])input) 232 ); 233 CharsetDecoder cd = ( 234 "default".equalsIgnoreCase(inputStreamCharset) 235 ? Charset.defaultCharset() 236 : Charset.forName(inputStreamCharset) 237 ).newDecoder(); 238 if (strict) { 239 cd.onMalformedInput(CodingErrorAction.REPORT); 240 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 241 } else { 242 cd.onMalformedInput(CodingErrorAction.REPLACE); 243 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 244 } 245 reader = new InputStreamReader(is, cd); 246 if (debug) { 247 inputString = read(reader); 248 reader = new StringReader(inputString); 249 } 250 } else if (input instanceof File) { 251 CharsetDecoder cd = ( 252 "DEFAULT".equalsIgnoreCase(fileCharset) 253 ? Charset.defaultCharset() 254 : Charset.forName(fileCharset) 255 ).newDecoder(); 256 if (strict) { 257 cd.onMalformedInput(CodingErrorAction.REPORT); 258 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 259 } else { 260 cd.onMalformedInput(CodingErrorAction.REPLACE); 261 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 262 } 263 reader = new InputStreamReader(new FileInputStream((File)input), cd); 264 if (debug) { 265 inputString = read(reader); 266 reader = new StringReader(inputString); 267 } 268 doClose = true; 269 } else { 270 throw new IOException("Cannot convert object of type "+input.getClass().getName()+" to a Reader."); 271 } 272 273 return reader; 274 } 275 276 /** 277 * Returns the contents of this pipe as a buffered reader. 278 * 279 * <p> 280 * If the reader passed into this pipe is already a buffered reader, that reader will be returned. 281 * 282 * @return The contents of this pipe as a buffered reader. 283 * @throws Exception 284 */ 285 public Reader getBufferedReader() throws Exception { 286 return IOUtils.getBufferedReader(getReader()); 287 } 288 289 /** 290 * Returns the input to this parser as a plain string. 291 * 292 * <p> 293 * This method only returns a value if {@link BeanContext#BEAN_debug} is enabled. 294 * 295 * @return The input as a string, or <jk>null</jk> if debug mode not enabled. 296 */ 297 public String getInputAsString() { 298 return inputString; 299 } 300 301 /** 302 * Converts this pipe into a {@link ParserReader}. 303 * 304 * @return The converted pipe. 305 * @throws Exception 306 */ 307 public ParserReader getParserReader() throws Exception { 308 if (input == null) 309 return null; 310 if (input instanceof ParserReader) 311 parserReader = (ParserReader)input; 312 else 313 parserReader = new ParserReader(this); 314 return parserReader; 315 } 316 317 /** 318 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 319 * 320 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 321 */ 322 public boolean isString() { 323 return inputString != null; 324 } 325 326 /** 327 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe. 328 * 329 * <p> 330 * Used for gathering the failure position when {@link ParseException} is thrown. 331 * 332 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe. 333 */ 334 public void setPositionable(Positionable positionable) { 335 this.positionable = positionable; 336 } 337 338 Position getPosition() { 339 if (positionable == null) 340 return Position.UNKNOWN; 341 Position p = positionable.getPosition(); 342 if (p == null) 343 return Position.UNKNOWN; 344 return p; 345 } 346 347 @Override /* Closeable */ 348 public void close() { 349 try { 350 if (doClose) 351 IOUtils.close(reader, inputStream); 352 } catch (IOException e) { 353 throw new BeanRuntimeException(e); 354 } 355 } 356}