001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.parser; 014 015import java.io.*; 016import java.lang.reflect.*; 017import java.util.*; 018 019import org.apache.juneau.*; 020import org.apache.juneau.annotation.*; 021import org.apache.juneau.collections.*; 022import org.apache.juneau.html.*; 023import org.apache.juneau.http.*; 024import org.apache.juneau.json.*; 025import org.apache.juneau.msgpack.*; 026import org.apache.juneau.transform.*; 027import org.apache.juneau.transforms.*; 028import org.apache.juneau.uon.*; 029import org.apache.juneau.utils.*; 030import org.apache.juneau.xml.*; 031 032/** 033 * Parent class for all Juneau parsers. 034 * 035 * <h5 class='topic'>Valid data conversions</h5> 036 * 037 * Parsers can parse any parsable POJO types, as specified in the {@doc PojoCategories}. 038 * 039 * <p> 040 * Some examples of conversions are shown below... 041 * </p> 042 * <table class='styled'> 043 * <tr> 044 * <th>Data type</th> 045 * <th>Class type</th> 046 * <th>JSON example</th> 047 * <th>XML example</th> 048 * <th>Class examples</th> 049 * </tr> 050 * <tr> 051 * <td>object</td> 052 * <td>Maps, Java beans</td> 053 * <td class='code'>{name:<js>'John Smith'</js>,age:21}</td> 054 * <td class='code'><xt><object> 055 * <name</xt> <xa>type</xa>=<xs>'string'</xs><xt>></xt>John Smith<xt></name> 056 * <age</xt> <xa>type</xa>=<xs>'number'</xs><xt>></xt>21<xt></age> 057 * </object></xt></td> 058 * <td class='code'>HashMap, TreeMap<String,Integer></td> 059 * </tr> 060 * <tr> 061 * <td>array</td> 062 * <td>Collections, Java arrays</td> 063 * <td class='code'>[1,2,3]</td> 064 * <td class='code'><xt><array> 065 * <number></xt>1<xt></number> 066 * <number></xt>2<xt></number> 067 * <number></xt>3<xt></number> 068 * </array></xt></td> 069 * <td class='code'>List<Integer>, <jk>int</jk>[], Float[], Set<Person></td> 070 * </tr> 071 * <tr> 072 * <td>number</td> 073 * <td>Numbers</td> 074 * <td class='code'>123</td> 075 * <td class='code'><xt><number></xt>123<xt></number></xt></td> 076 * <td class='code'>Integer, Long, Float, <jk>int</jk></td> 077 * </tr> 078 * <tr> 079 * <td>boolean</td> 080 * <td>Booleans</td> 081 * <td class='code'><jk>true</jk></td> 082 * <td class='code'><xt><boolean></xt>true<xt></boolean></xt></td> 083 * <td class='code'>Boolean</td> 084 * </tr> 085 * <tr> 086 * <td>string</td> 087 * <td>CharSequences</td> 088 * <td class='code'><js>'foobar'</js></td> 089 * <td class='code'><xt><string></xt>foobar<xt></string></xt></td> 090 * <td class='code'>String, StringBuilder</td> 091 * </tr> 092 * </table> 093 * 094 * <p> 095 * In addition, any class types with {@link PojoSwap PojoSwaps} associated with them on the registered 096 * bean context can also be passed in. 097 * 098 * <p> 099 * For example, if the {@link TemporalCalendarSwap} transform is used to generalize {@code Calendar} objects to {@code String} 100 * objects. 101 * When registered with this parser, you can construct {@code Calendar} objects from {@code Strings} using the 102 * following syntax... 103 * <p class='bcode w800'> 104 * Calendar c = parser.parse(<js>"'Sun Mar 03 04:05:06 EST 2001'"</js>, GregorianCalendar.<jk>class</jk>); 105 * </p> 106 * 107 * <p> 108 * If <code>Object.<jk>class</jk></code> is specified as the target type, then the parser automatically determines the 109 * data types and generates the following object types... 110 * <table class='styled'> 111 * <tr><th>JSON type</th><th>Class type</th></tr> 112 * <tr><td>object</td><td>{@link OMap}</td></tr> 113 * <tr><td>array</td><td>{@link OList}</td></tr> 114 * <tr><td>number</td><td>{@link Number}<br>(depending on length and format, could be {@link Integer}, 115 * {@link Double}, {@link Float}, etc...)</td></tr> 116 * <tr><td>boolean</td><td>{@link Boolean}</td></tr> 117 * <tr><td>string</td><td>{@link String}</td></tr> 118 * </table> 119 */ 120@ConfigurableContext 121public abstract class Parser extends BeanContext { 122 123 /** 124 * Represents no Parser. 125 */ 126 public static abstract class Null extends Parser { 127 private Null(PropertyStore ps, String[] consumes) { 128 super(ps, consumes); 129 } 130 } 131 132 //------------------------------------------------------------------------------------------------------------------- 133 // Configurable properties 134 //------------------------------------------------------------------------------------------------------------------- 135 136 static final String PREFIX = "Parser"; 137 138 /** 139 * Configuration property: Auto-close streams. 140 * 141 * <h5 class='section'>Property:</h5> 142 * <ul class='spaced-list'> 143 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_autoCloseStreams PARSER_autoCloseStreams} 144 * <li><b>Name:</b> <js>"Parser.autoCloseStreams.b"</js> 145 * <li><b>Data type:</b> <jk>boolean</jk> 146 * <li><b>System property:</b> <c>Parser.autoCloseStreams</c> 147 * <li><b>Environment variable:</b> <c>PARSER_AUTOCLOSESTREAMS</c> 148 * <li><b>Default:</b> <jk>false</jk> 149 * <li><b>Session property:</b> <jk>false</jk> 150 * <li><b>Annotations:</b> 151 * <ul> 152 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#autoCloseStreams()} 153 * </ul> 154 * <li><b>Methods:</b> 155 * <ul> 156 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#autoCloseStreams()} 157 * </ul> 158 * </ul> 159 * 160 * <h5 class='section'>Description:</h5> 161 * 162 * <p> 163 * When enabled, <l>InputStreams</l> and <l>Readers</l> passed into parsers will be closed 164 * after parsing is complete. 165 * 166 * <h5 class='section'>Example:</h5> 167 * <p class='bcode w800'> 168 * <jc>// Create a parser using strict mode.</jc> 169 * ReaderParser p = JsonParser 170 * .<jsm>create</jsm>() 171 * .autoCloseStreams() 172 * .build(); 173 * 174 * <jc>// Same, but use property.</jc> 175 * ReaderParser p = JsonParser 176 * .<jsm>create</jsm>() 177 * .set(<jsf>PARSER_autoCloseStreams</jsf>, <jk>true</jk>) 178 * .build(); 179 * 180 * Reader r = <jk>new</jk> FileReader(<js>"/tmp/myfile.json"</js>); 181 * MyBean myBean = p.parse(r, MyBean.<jk>class</jk>); 182 * 183 * <jsm>assertTrue</jsm>(r.isClosed()); 184 * </p> 185 */ 186 public static final String PARSER_autoCloseStreams = PREFIX + ".autoCloseStreams.b"; 187 188 /** 189 * Configuration property: Debug output lines. 190 * 191 * <h5 class='section'>Property:</h5> 192 * <ul class='spaced-list'> 193 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_debugOutputLines PARSER_debugOutputLines} 194 * <li><b>Name:</b> <js>"Parser.debugOutputLines.i"</js> 195 * <li><b>Data type:</b> <jk>int</jk> 196 * <li><b>System property:</b> <c>Parser.debugOutputLines</c> 197 * <li><b>Environment variable:</b> <c>PARSER_DEBUGOUTPUTLINES</c> 198 * <li><b>Default:</b> <c>5</c> 199 * <li><b>Session property:</b> <jk>false</jk> 200 * <li><b>Annotations:</b> 201 * <ul> 202 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#debugOutputLines()} 203 * </ul> 204 * <li><b>Methods:</b> 205 * <ul> 206 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#debugOutputLines(int)} 207 * </ul> 208 * </ul> 209 * 210 * <h5 class='section'>Description:</h5> 211 * 212 * <p> 213 * When parse errors occur, this specifies the number of lines of input before and after the 214 * error location to be printed as part of the exception message. 215 * 216 * <h5 class='section'>Example:</h5> 217 * <p class='bcode w800'> 218 * <jc>// Create a parser whose exceptions print out 100 lines before and after the parse error location.</jc> 219 * ReaderParser p = JsonParser 220 * .<jsm>create</jsm>() 221 * .debug() <jc>// Enable debug mode to capture Reader contents as strings.</jc> 222 * .debugOuputLines(100) 223 * .build(); 224 * 225 * <jc>// Same, but use property.</jc> 226 * ReaderParser p = JsonParser 227 * .<jsm>create</jsm>() 228 * .set(<jsf>BEAN_debug</jsf>, <jk>true</jk>) 229 * .set(<jsf>PARSER_debugOutputLines</jsf>, 100) 230 * .build(); 231 * 232 * Reader r = <jk>new</jk> FileReader(<js>"/tmp/mybadfile.json"</js>); 233 * <jk>try</jk> { 234 * p.parse(r, Object.<jk>class</jk>); 235 * } <jk>catch</jk> (ParseException e) { 236 * System.<jsf>err</jsf>.println(e.getMessage()); <jc>// Will display 200 lines of the output.</jc> 237 * } 238 * </p> 239 */ 240 public static final String PARSER_debugOutputLines = PREFIX + ".debugOutputLines.i"; 241 242 /** 243 * Configuration property: Parser listener. 244 * 245 * <h5 class='section'>Property:</h5> 246 * <ul class='spaced-list'> 247 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_listener PARSER_listener} 248 * <li><b>Name:</b> <js>"Parser.listener.c"</js> 249 * <li><b>Data type:</b> <c>Class<{@link org.apache.juneau.parser.ParserListener}></c> 250 * <li><b>Default:</b> <jk>null</jk> 251 * <li><b>Session property:</b> <jk>false</jk> 252 * <li><b>Annotations:</b> 253 * <ul> 254 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#listener()} 255 * </ul> 256 * <li><b>Methods:</b> 257 * <ul> 258 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#listener(Class)} 259 * </ul> 260 * </ul> 261 * 262 * <h5 class='section'>Description:</h5> 263 * 264 * <p> 265 * Class used to listen for errors and warnings that occur during parsing. 266 * 267 * <h5 class='section'>Example:</h5> 268 * <p class='bcode w800'> 269 * <jc>// Define our parser listener.</jc> 270 * <jc>// Simply captures all unknown bean property events.</jc> 271 * <jk>public class</jk> MyParserListener <jk>extends</jk> ParserListener { 272 * 273 * <jc>// A simple property to store our events.</jc> 274 * <jk>public</jk> List<String> <jf>events</jf> = <jk>new</jk> LinkedList<>(); 275 * 276 * <ja>@Override</ja> 277 * <jk>public</jk> <T> <jk>void</jk> onUnknownBeanProperty(ParserSession session, ParserPipe pipe, String propertyName, Class<T> beanClass, T bean, <jk>int</jk> line, <jk>int</jk> col) { 278 * <jf>events</jf>.add(propertyName + <js>","</js> + line + <js>","</js> + col); 279 * } 280 * } 281 * 282 * <jc>// Create a parser using our listener.</jc> 283 * ReaderParser p = JsonParser 284 * .<jsm>create</jsm>() 285 * .listener(MyParserListener.<jk>class</jk>) 286 * .build(); 287 * 288 * <jc>// Same, but use property.</jc> 289 * ReaderParser p = JsonParser 290 * .<jsm>create</jsm>() 291 * .set(<jsf>PARSER_listener</jsf>, MyParserListener.<jk>class</jk>) 292 * .build(); 293 * 294 * <jc>// Create a session object.</jc> 295 * <jc>// Needed because listeners are created per-session.</jc> 296 * <jk>try</jk> (ReaderParserSession s = p.createSession()) { 297 * 298 * <jc>// Parse some JSON object.</jc> 299 * MyBean myBean = s.parse(<js>"{...}"</js>, MyBean.<jk>class</jk>); 300 * 301 * <jc>// Get the listener.</jc> 302 * MyParserListener l = s.getListener(MyParserListener.<jk>class</jk>); 303 * 304 * <jc>// Dump the results to the console.</jc> 305 * SimpleJsonSerializer.<jsf>DEFAULT</jsf>.println(l.<jf>events</jf>); 306 * } 307 * </p> 308 */ 309 public static final String PARSER_listener = PREFIX + ".listener.c"; 310 311 /** 312 * Configuration property: Strict mode. 313 * 314 * <h5 class='section'>Property:</h5> 315 * <ul class='spaced-list'> 316 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_strict PARSER_strict} 317 * <li><b>Name:</b> <js>"Parser.strict.b"</js> 318 * <li><b>Data type:</b> <jk>boolean</jk> 319 * <li><b>System property:</b> <c>Parser.strict</c> 320 * <li><b>Environment variable:</b> <c>PARSER_STRICT</c> 321 * <li><b>Default:</b> <jk>false</jk> 322 * <li><b>Session property:</b> <jk>false</jk> 323 * <li><b>Annotations:</b> 324 * <ul> 325 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#strict()} 326 * </ul> 327 * <li><b>Methods:</b> 328 * <ul> 329 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#strict()} 330 * </ul> 331 * </ul> 332 * 333 * <h5 class='section'>Description:</h5> 334 * 335 * <p> 336 * When enabled, strict mode for the parser is enabled. 337 * 338 * <p> 339 * Strict mode can mean different things for different parsers. 340 * 341 * <table class='styled'> 342 * <tr><th>Parser class</th><th>Strict behavior</th></tr> 343 * <tr> 344 * <td>All reader-based parsers</td> 345 * <td> 346 * When enabled, throws {@link ParseException ParseExceptions} on malformed charset input. 347 * Otherwise, malformed input is ignored. 348 * </td> 349 * </tr> 350 * <tr> 351 * <td>{@link JsonParser}</td> 352 * <td> 353 * When enabled, throws exceptions on the following invalid JSON syntax: 354 * <ul> 355 * <li>Unquoted attributes. 356 * <li>Missing attribute values. 357 * <li>Concatenated strings. 358 * <li>Javascript comments. 359 * <li>Numbers and booleans when Strings are expected. 360 * <li>Numbers valid in Java but not JSON (e.g. octal notation, etc...) 361 * </ul> 362 * </td> 363 * </tr> 364 * </table> 365 * 366 * <h5 class='section'>Example:</h5> 367 * <p class='bcode w800'> 368 * <jc>// Create a parser using strict mode.</jc> 369 * ReaderParser p = JsonParser 370 * .<jsm>create</jsm>() 371 * .strict() 372 * .build(); 373 * 374 * <jc>// Same, but use property.</jc> 375 * ReaderParser p = JsonParser 376 * .<jsm>create</jsm>() 377 * .set(<jsf>PARSER_strict</jsf>, <jk>true</jk>) 378 * .build(); 379 * 380 * <jc>// Use it.</jc> 381 * <jk>try</jk> { 382 * MyBean myBean = p.parse(<js>"{unquotedAttr:'value'}"</js>, MyBean.<jk>class</jk>); 383 * } <jk>catch</jk> (ParseException e) { 384 * <jsm>assertTrue</jsm>(e.getMessage().contains(<js>"Unquoted attribute detected."</js>); 385 * } 386 * </p> 387 */ 388 public static final String PARSER_strict = PREFIX + ".strict.b"; 389 390 /** 391 * Configuration property: Trim parsed strings. 392 * 393 * <h5 class='section'>Property:</h5> 394 * <ul class='spaced-list'> 395 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_trimStrings PARSER_trimStrings} 396 * <li><b>Name:</b> <js>"Parser.trimStrings.b"</js> 397 * <li><b>Data type:</b> <jk>boolean</jk> 398 * <li><b>System property:</b> <c>Parser.trimStrings</c> 399 * <li><b>Environment variable:</b> <c>PARSER_TRIMSTRINGS</c> 400 * <li><b>Default:</b> <jk>false</jk> 401 * <li><b>Session property:</b> <jk>false</jk> 402 * <li><b>Annotations:</b> 403 * <ul> 404 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#trimStrings()} 405 * </ul> 406 * <li><b>Methods:</b> 407 * <ul> 408 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#trimStrings()} 409 * </ul> 410 * </ul> 411 * 412 * <h5 class='section'>Description:</h5> 413 * 414 * <p> 415 * When enabled, string values will be trimmed of whitespace using {@link String#trim()} before being added to 416 * the POJO. 417 * 418 * <h5 class='section'>Example:</h5> 419 * <p class='bcode w800'> 420 * <jc>// Create a parser with trim-strings enabled.</jc> 421 * ReaderParser p = JsonParser 422 * .<jsm>create</jsm>() 423 * .trimStrings() 424 * .build(); 425 * 426 * <jc>// Same, but use property.</jc> 427 * ReaderParser p = JsonParser 428 * .<jsm>create</jsm>() 429 * .set(<jsf>PARSER_trimStrings</jsf>, <jk>true</jk>) 430 * .build(); 431 * 432 * <jc>// Use it.</jc> 433 * String json = <js>"{' foo ':' bar '}"</js>; 434 * Map<String,String> map = p.parse(json, HashMap.<jk>class</jk>, String.<jk>class</jk>, String.<jk>class</jk>); 435 * 436 * <jc>// Make sure strings are parsed.</jc> 437 * <jsm>assertEquals</jsm>(<js>"bar"</js>, map.get(<js>"foo"</js>)); 438 * </p> 439 */ 440 public static final String PARSER_trimStrings = PREFIX + ".trimStrings.b"; 441 442 /** 443 * Configuration property: Unbuffered. 444 * 445 * <h5 class='section'>Property:</h5> 446 * <ul class='spaced-list'> 447 * <li><b>ID:</b> {@link org.apache.juneau.parser.Parser#PARSER_unbuffered PARSER_unbuffered} 448 * <li><b>Name:</b> <js>"Parser.unbuffered.b"</js> 449 * <li><b>Data type:</b> <jk>boolean</jk> 450 * <li><b>System property:</b> <c>Parser.unbuffered</c> 451 * <li><b>Environment variable:</b> <c>PARSER_UNBUFFERED</c> 452 * <li><b>Default:</b> <jk>false</jk> 453 * <li><b>Session property:</b> <jk>false</jk> 454 * <li><b>Annotations:</b> 455 * <ul> 456 * <li class='ja'>{@link org.apache.juneau.parser.annotation.ParserConfig#unbuffered()} 457 * </ul> 458 * <li><b>Methods:</b> 459 * <ul> 460 * <li class='jm'>{@link org.apache.juneau.parser.ParserBuilder#unbuffered()} 461 * </ul> 462 * </ul> 463 * 464 * <h5 class='section'>Description:</h5> 465 * 466 * <p> 467 * When enabled, don't use internal buffering during parsing. 468 * 469 * <p> 470 * This is useful in cases when you want to parse the same input stream or reader multiple times 471 * because it may contain multiple independent POJOs to parse. 472 * <br>Buffering would cause the parser to read past the current POJO in the stream. 473 * 474 * <h5 class='section'>Example:</h5> 475 * <p class='bcode w800'> 476 * <jc>// Create a parser using strict mode.</jc> 477 * ReaderParser p = JsonParser 478 * .<jsm>create</jsm>() 479 * .unbuffered() 480 * .build(); 481 * 482 * <jc>// Same, but use property.</jc> 483 * ReaderParser p = JsonParser 484 * .<jsm>create</jsm>() 485 * .set(<jsf>PARSER_unbuffered</jsf>, <jk>true</jk>) 486 * .build(); 487 * 488 * <jc>// If you're calling parse on the same input multiple times, use a session instead of the parser directly.</jc> 489 * <jc>// It's more efficient because we don't need to recalc the session settings again. </jc> 490 * ReaderParserSession s = p.createSession(); 491 * 492 * <jc>// Read input with multiple POJOs</jc> 493 * Reader json = <jk>new</jk> StringReader(<js>"{foo:'bar'}{foo:'baz'}"</js>); 494 * MyBean myBean1 = s.parse(json, MyBean.<jk>class</jk>); 495 * MyBean myBean2 = s.parse(json, MyBean.<jk>class</jk>); 496 * </p> 497 * 498 * <ul class='notes'> 499 * <li> 500 * This only allows for multi-input streams for the following parsers: 501 * <ul> 502 * <li class='jc'>{@link JsonParser} 503 * <li class='jc'>{@link UonParser} 504 * </ul> 505 * It has no effect on the following parsers: 506 * <ul> 507 * <li class='jc'>{@link MsgPackParser} - It already doesn't use buffering. 508 * <li class='jc'>{@link XmlParser}, {@link HtmlParser} - These use StAX which doesn't allow for more than one root element anyway. 509 * <li>RDF parsers - These read everything into an internal model before any parsing begins. 510 * </ul> 511 * </ul> 512 */ 513 public static final String PARSER_unbuffered = PREFIX + ".unbuffered.b"; 514 515 static Parser DEFAULT = new Parser(PropertyStore.create().build()) { 516 @Override 517 public ParserSession createSession(ParserSessionArgs args) { 518 throw new NoSuchMethodError(); 519 } 520 }; 521 522 //------------------------------------------------------------------------------------------------------------------- 523 // Instance 524 //------------------------------------------------------------------------------------------------------------------- 525 526 private final boolean trimStrings, strict, autoCloseStreams, unbuffered; 527 private final int debugOutputLines; 528 private final Class<? extends ParserListener> listener; 529 530 /** General parser properties currently set on this parser. */ 531 private final MediaType[] consumes; 532 533 /** 534 * Constructor. 535 * 536 * @param ps The property store containing all the settings for this object. 537 * @param consumes The list of media types that this parser consumes (e.g. <js>"application/json"</js>). 538 */ 539 protected Parser(PropertyStore ps, String...consumes) { 540 super(ps); 541 542 trimStrings = getBooleanProperty(PARSER_trimStrings, false); 543 strict = getBooleanProperty(PARSER_strict, false); 544 autoCloseStreams = getBooleanProperty(PARSER_autoCloseStreams, false); 545 debugOutputLines = getIntegerProperty(PARSER_debugOutputLines, 5); 546 unbuffered = getBooleanProperty(PARSER_unbuffered, false); 547 listener = getClassProperty(PARSER_listener, ParserListener.class, null); 548 this.consumes = new MediaType[consumes.length]; 549 for (int i = 0; i < consumes.length; i++) { 550 this.consumes[i] = MediaType.of(consumes[i]); 551 } 552 } 553 554 @Override /* Context */ 555 public ParserBuilder builder() { 556 return new ParserBuilder(getPropertyStore()); 557 } 558 559 /** 560 * Instantiates a new clean-slate {@link ParserBuilder} object. 561 * 562 * <p> 563 * This is equivalent to simply calling <code><jk>new</jk> ParserBuilder()</code>. 564 * 565 * <p> 566 * Note that this method creates a builder initialized to all default settings, whereas {@link #builder()} copies 567 * the settings of the object called on. 568 * 569 * @return A new {@link ParserBuilder} object. 570 */ 571 public static ParserBuilder create() { 572 return new ParserBuilder(PropertyStore.DEFAULT); 573 } 574 575 576 //----------------------------------------------------------------------------------------------------------------- 577 // Abstract methods 578 //----------------------------------------------------------------------------------------------------------------- 579 580 /** 581 * Returns <jk>true</jk> if this parser subclasses from {@link ReaderParser}. 582 * 583 * @return <jk>true</jk> if this parser subclasses from {@link ReaderParser}. 584 */ 585 public boolean isReaderParser() { 586 return true; 587 } 588 589 /** 590 * Create the session object that will be passed in to the parse method. 591 * 592 * <p> 593 * It's up to implementers to decide what the session object looks like, although typically it's going to be a 594 * subclass of {@link ParserSession}. 595 * 596 * @param args 597 * Runtime arguments. 598 * @return The new session. 599 */ 600 public abstract ParserSession createSession(ParserSessionArgs args); 601 602 603 //----------------------------------------------------------------------------------------------------------------- 604 // Other methods 605 //----------------------------------------------------------------------------------------------------------------- 606 607 /** 608 * Parses input into the specified object type. 609 * 610 * <p> 611 * The type can be a simple type (e.g. beans, strings, numbers) or parameterized type (collections/maps). 612 * 613 * <h5 class='section'>Examples:</h5> 614 * <p class='bcode w800'> 615 * ReaderParser p = JsonParser.<jsf>DEFAULT</jsf>; 616 * 617 * <jc>// Parse into a linked-list of strings.</jc> 618 * List l = p.parse(json, LinkedList.<jk>class</jk>, String.<jk>class</jk>); 619 * 620 * <jc>// Parse into a linked-list of beans.</jc> 621 * List l = p.parse(json, LinkedList.<jk>class</jk>, MyBean.<jk>class</jk>); 622 * 623 * <jc>// Parse into a linked-list of linked-lists of strings.</jc> 624 * List l = p.parse(json, LinkedList.<jk>class</jk>, LinkedList.<jk>class</jk>, String.<jk>class</jk>); 625 * 626 * <jc>// Parse into a map of string keys/values.</jc> 627 * Map m = p.parse(json, TreeMap.<jk>class</jk>, String.<jk>class</jk>, String.<jk>class</jk>); 628 * 629 * <jc>// Parse into a map containing string keys and values of lists containing beans.</jc> 630 * Map m = p.parse(json, TreeMap.<jk>class</jk>, String.<jk>class</jk>, List.<jk>class</jk>, MyBean.<jk>class</jk>); 631 * </p> 632 * 633 * <p> 634 * <c>Collection</c> classes are assumed to be followed by zero or one objects indicating the element type. 635 * 636 * <p> 637 * <c>Map</c> classes are assumed to be followed by zero or two meta objects indicating the key and value types. 638 * 639 * <p> 640 * The array can be arbitrarily long to indicate arbitrarily complex data structures. 641 * 642 * <ul class='notes'> 643 * <li> 644 * Use the {@link #parse(Object, Class)} method instead if you don't need a parameterized map/collection. 645 * </ul> 646 * 647 * @param <T> The class type of the object to create. 648 * @param input 649 * The input. 650 * <br>Character-based parsers can handle the following input class types: 651 * <ul> 652 * <li><jk>null</jk> 653 * <li>{@link Reader} 654 * <li>{@link CharSequence} 655 * <li>{@link InputStream} containing UTF-8 encoded text (or charset defined by 656 * {@link ReaderParser#RPARSER_streamCharset} property value). 657 * <li><code><jk>byte</jk>[]</code> containing UTF-8 encoded text (or charset defined by 658 * {@link ReaderParser#RPARSER_streamCharset} property value). 659 * <li>{@link File} containing system encoded text (or charset defined by 660 * {@link ReaderParser#RPARSER_fileCharset} property value). 661 * </ul> 662 * <br>Stream-based parsers can handle the following input class types: 663 * <ul> 664 * <li><jk>null</jk> 665 * <li>{@link InputStream} 666 * <li><code><jk>byte</jk>[]</code> 667 * <li>{@link File} 668 * <li>{@link CharSequence} containing encoded bytes according to the {@link InputStreamParser#ISPARSER_binaryFormat} setting. 669 * </ul> 670 * @param type 671 * The object type to create. 672 * <br>Can be any of the following: {@link ClassMeta}, {@link Class}, {@link ParameterizedType}, {@link GenericArrayType} 673 * @param args 674 * The type arguments of the class if it's a collection or map. 675 * <br>Can be any of the following: {@link ClassMeta}, {@link Class}, {@link ParameterizedType}, {@link GenericArrayType} 676 * <br>Ignored if the main type is not a map or collection. 677 * @return The parsed object. 678 * @throws ParseException Malformed input encountered. 679 * @throws IOException Thrown by underlying stream. 680 * @see BeanSession#getClassMeta(Type,Type...) for argument syntax for maps and collections. 681 */ 682 public final <T> T parse(Object input, Type type, Type...args) throws ParseException, IOException { 683 return createSession().parse(input, type, args); 684 } 685 686 /** 687 * Same as {@link #parse(Object, Type, Type...)} but since it's a {@link String} input doesn't throw an {@link IOException}. 688 * 689 * @param <T> The class type of the object being created. 690 * @param input 691 * The input. 692 * See {@link #parse(Object, Type, Type...)} for details. 693 * @param type 694 * The object type to create. 695 * <br>Can be any of the following: {@link ClassMeta}, {@link Class}, {@link ParameterizedType}, {@link GenericArrayType} 696 * @param args 697 * The type arguments of the class if it's a collection or map. 698 * <br>Can be any of the following: {@link ClassMeta}, {@link Class}, {@link ParameterizedType}, {@link GenericArrayType} 699 * <br>Ignored if the main type is not a map or collection. 700 * @return The parsed object. 701 * @throws ParseException Malformed input encountered. 702 */ 703 public final <T> T parse(String input, Type type, Type...args) throws ParseException { 704 return createSession().parse(input, type, args); 705 } 706 707 /** 708 * Same as {@link #parse(Object, Type, Type...)} except optimized for a non-parameterized class. 709 * 710 * <p> 711 * This is the preferred parse method for simple types since you don't need to cast the results. 712 * 713 * <h5 class='section'>Examples:</h5> 714 * <p class='bcode w800'> 715 * ReaderParser p = JsonParser.<jsf>DEFAULT</jsf>; 716 * 717 * <jc>// Parse into a string.</jc> 718 * String s = p.parse(json, String.<jk>class</jk>); 719 * 720 * <jc>// Parse into a bean.</jc> 721 * MyBean b = p.parse(json, MyBean.<jk>class</jk>); 722 * 723 * <jc>// Parse into a bean array.</jc> 724 * MyBean[] ba = p.parse(json, MyBean[].<jk>class</jk>); 725 * 726 * <jc>// Parse into a linked-list of objects.</jc> 727 * List l = p.parse(json, LinkedList.<jk>class</jk>); 728 * 729 * <jc>// Parse into a map of object keys/values.</jc> 730 * Map m = p.parse(json, TreeMap.<jk>class</jk>); 731 * </p> 732 * 733 * @param <T> The class type of the object being created. 734 * @param input 735 * The input. 736 * See {@link #parse(Object, Type, Type...)} for details. 737 * @param type The object type to create. 738 * @return The parsed object. 739 * @throws ParseException Malformed input encountered. 740 * @throws IOException Thrown by the underlying stream. 741 */ 742 public final <T> T parse(Object input, Class<T> type) throws ParseException, IOException { 743 return createSession().parse(input, type); 744 } 745 746 /** 747 * Same as {@link #parse(Object, Class)} but since it's a {@link String} input doesn't throw an {@link IOException}. 748 * 749 * @param <T> The class type of the object being created. 750 * @param input 751 * The input. 752 * See {@link #parse(Object, Type, Type...)} for details. 753 * @param type The object type to create. 754 * @return The parsed object. 755 * @throws ParseException Malformed input encountered. 756 */ 757 public final <T> T parse(String input, Class<T> type) throws ParseException { 758 return createSession().parse(input, type); 759 } 760 761 /** 762 * Same as {@link #parse(Object, Type, Type...)} except the type has already been converted into a {@link ClassMeta} 763 * object. 764 * 765 * <p> 766 * This is mostly an internal method used by the framework. 767 * 768 * @param <T> The class type of the object being created. 769 * @param input 770 * The input. 771 * See {@link #parse(Object, Type, Type...)} for details. 772 * @param type The object type to create. 773 * @return The parsed object. 774 * @throws ParseException Malformed input encountered. 775 * @throws IOException Thrown by the underlying stream. 776 */ 777 public final <T> T parse(Object input, ClassMeta<T> type) throws ParseException, IOException { 778 return createSession().parse(input, type); 779 } 780 781 /** 782 * Same as {@link #parse(Object, ClassMeta)} but since it's a {@link String} input doesn't throw an {@link IOException}. 783 * 784 * @param <T> The class type of the object being created. 785 * @param input 786 * The input. 787 * See {@link #parse(Object, Type, Type...)} for details. 788 * @param type The object type to create. 789 * @return The parsed object. 790 * @throws ParseException Malformed input encountered. 791 */ 792 public final <T> T parse(String input, ClassMeta<T> type) throws ParseException { 793 return createSession().parse(input, type); 794 } 795 796 @Override /* Context */ 797 public ParserSession createSession() { 798 return createSession(createDefaultSessionArgs()); 799 } 800 801 @Override /* Context */ 802 public final ParserSessionArgs createDefaultSessionArgs() { 803 return new ParserSessionArgs().mediaType(getPrimaryMediaType()); 804 } 805 806 //----------------------------------------------------------------------------------------------------------------- 807 // Optional methods 808 //----------------------------------------------------------------------------------------------------------------- 809 810 /** 811 * Parses the contents of the specified reader and loads the results into the specified map. 812 * 813 * <p> 814 * Reader must contain something that serializes to a map (such as text containing a JSON object). 815 * 816 * <p> 817 * Used in the following locations: 818 * <ul class='spaced-list'> 819 * <li> 820 * The various character-based constructors in {@link OMap} (e.g. 821 * {@link OMap#OMap(CharSequence,Parser)}). 822 * </ul> 823 * 824 * @param <K> The key class type. 825 * @param <V> The value class type. 826 * @param input The input. See {@link #parse(Object, ClassMeta)} for supported input types. 827 * @param m The map being loaded. 828 * @param keyType The class type of the keys, or <jk>null</jk> to default to <code>String.<jk>class</jk></code>. 829 * @param valueType The class type of the values, or <jk>null</jk> to default to whatever is being parsed. 830 * @return The same map that was passed in to allow this method to be chained. 831 * @throws ParseException Malformed input encountered. 832 * @throws UnsupportedOperationException If not implemented. 833 */ 834 public final <K,V> Map<K,V> parseIntoMap(Object input, Map<K,V> m, Type keyType, Type valueType) throws ParseException { 835 return createSession().parseIntoMap(input, m, keyType, valueType); 836 } 837 838 /** 839 * Parses the contents of the specified reader and loads the results into the specified collection. 840 * 841 * <p> 842 * Used in the following locations: 843 * <ul class='spaced-list'> 844 * <li> 845 * The various character-based constructors in {@link OList} (e.g. 846 * {@link OList#OList(CharSequence,Parser)}. 847 * </ul> 848 * 849 * @param <E> The element class type. 850 * @param input The input. See {@link #parse(Object, ClassMeta)} for supported input types. 851 * @param c The collection being loaded. 852 * @param elementType The class type of the elements, or <jk>null</jk> to default to whatever is being parsed. 853 * @return The same collection that was passed in to allow this method to be chained. 854 * @throws ParseException Malformed input encountered. 855 * @throws UnsupportedOperationException If not implemented. 856 */ 857 public final <E> Collection<E> parseIntoCollection(Object input, Collection<E> c, Type elementType) throws ParseException { 858 return createSession().parseIntoCollection(input, c, elementType); 859 } 860 861 /** 862 * Parses the specified array input with each entry in the object defined by the {@code argTypes} 863 * argument. 864 * 865 * <p> 866 * Used for converting arrays (e.g. <js>"[arg1,arg2,...]"</js>) into an {@code Object[]} that can be passed 867 * to the {@code Method.invoke(target, args)} method. 868 * 869 * <p> 870 * Used in the following locations: 871 * <ul class='spaced-list'> 872 * <li> 873 * Used to parse argument strings in the {@link PojoIntrospector#invokeMethod(Method, Reader)} method. 874 * </ul> 875 * 876 * @param input The input. Subclasses can support different input types. 877 * @param argTypes Specifies the type of objects to create for each entry in the array. 878 * @return An array of parsed objects. 879 * @throws ParseException Malformed input encountered. 880 */ 881 public final Object[] parseArgs(Object input, Type[] argTypes) throws ParseException { 882 if (argTypes == null || argTypes.length == 0) 883 return new Object[0]; 884 return createSession().parseArgs(input, argTypes); 885 } 886 887 888 //----------------------------------------------------------------------------------------------------------------- 889 // Other methods 890 //----------------------------------------------------------------------------------------------------------------- 891 892 /** 893 * Returns the media types handled based on the values passed to the <c>consumes</c> constructor parameter. 894 * 895 * @return The list of media types. Never <jk>null</jk>. 896 */ 897 public final MediaType[] getMediaTypes() { 898 return consumes; 899 } 900 901 /** 902 * Returns the first media type handled based on the values passed to the <c>consumes</c> constructor parameter. 903 * 904 * @return The media type. 905 */ 906 public final MediaType getPrimaryMediaType() { 907 return consumes == null || consumes.length == 0 ? null : consumes[0]; 908 } 909 910 /** 911 * Returns <jk>true</jk> if this parser can handle the specified content type. 912 * 913 * @param contentType The content type to test. 914 * @return <jk>true</jk> if this parser can handle the specified content type. 915 */ 916 public boolean canHandle(String contentType) { 917 if (contentType != null) 918 for (MediaType mt : getMediaTypes()) 919 if (contentType.equals(mt.toString())) 920 return true; 921 return false; 922 } 923 924 //----------------------------------------------------------------------------------------------------------------- 925 // Properties 926 //----------------------------------------------------------------------------------------------------------------- 927 928 /** 929 * Auto-close streams. 930 * 931 * @see #PARSER_autoCloseStreams 932 * @return 933 * <jk>true</jk> if <l>InputStreams</l> and <l>Readers</l> passed into parsers will be closed 934 * after parsing is complete. 935 */ 936 protected final boolean isAutoCloseStreams() { 937 return autoCloseStreams; 938 } 939 940 /** 941 * Debug output lines. 942 * 943 * @see #PARSER_debugOutputLines 944 * @return 945 * The number of lines of input before and after the error location to be printed as part of the exception message. 946 */ 947 protected final int getDebugOutputLines() { 948 return debugOutputLines; 949 } 950 951 /** 952 * Parser listener. 953 * 954 * @see #PARSER_listener 955 * @return 956 * Class used to listen for errors and warnings that occur during parsing. 957 */ 958 protected final Class<? extends ParserListener> getListener() { 959 return listener; 960 } 961 962 /** 963 * Strict mode. 964 * 965 * @see #PARSER_strict 966 * @return 967 * <jk>true</jk> if strict mode for the parser is enabled. 968 */ 969 protected final boolean isStrict() { 970 return strict; 971 } 972 973 /** 974 * Trim parsed strings. 975 * 976 * @see #PARSER_trimStrings 977 * @return 978 * <jk>true</jk> if string values will be trimmed of whitespace using {@link String#trim()} before being added to 979 * the POJO. 980 */ 981 protected final boolean isTrimStrings() { 982 return trimStrings; 983 } 984 985 /** 986 * Unbuffered. 987 * 988 * @see #PARSER_unbuffered 989 * @return 990 * <jk>true</jk> if parsers don't use internal buffering during parsing. 991 */ 992 protected final boolean isUnbuffered() { 993 return unbuffered; 994 } 995 996 //----------------------------------------------------------------------------------------------------------------- 997 // Other methods 998 //----------------------------------------------------------------------------------------------------------------- 999 1000 @Override /* Context */ 1001 public OMap toMap() { 1002 return super.toMap() 1003 .a("Parser", new DefaultFilteringOMap() 1004 .a("autoCloseStreams", autoCloseStreams) 1005 .a("debugOutputLines", debugOutputLines) 1006 .a("listener", listener) 1007 .a("strict", strict) 1008 .a("trimStrings", trimStrings) 1009 .a("unbuffered", unbuffered) 1010 ); 1011 } 1012}