001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.html; 018 019import static javax.xml.stream.XMLStreamConstants.*; 020import static org.apache.juneau.common.utils.StringUtils.*; 021import static org.apache.juneau.common.utils.Utils.*; 022import static org.apache.juneau.html.HtmlTag.*; 023 024import java.io.*; 025import java.lang.reflect.*; 026import java.nio.charset.*; 027import java.util.*; 028import java.util.function.*; 029 030import javax.xml.stream.*; 031 032import org.apache.juneau.*; 033import org.apache.juneau.collections.*; 034import org.apache.juneau.common.utils.*; 035import org.apache.juneau.html.annotation.*; 036import org.apache.juneau.httppart.*; 037import org.apache.juneau.internal.*; 038import org.apache.juneau.parser.*; 039import org.apache.juneau.swap.*; 040import org.apache.juneau.xml.*; 041 042/** 043 * ContextSession object that lives for the duration of a single use of {@link HtmlParser}. 044 * 045 * <h5 class='section'>Notes:</h5><ul> 046 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 047 * </ul> 048 * 049 * <h5 class='section'>See Also:</h5><ul> 050 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/HtmlBasics">HTML Basics</a> 051 052 * </ul> 053 */ 054@SuppressWarnings({ "unchecked", "rawtypes" }) 055public class HtmlParserSession extends XmlParserSession { 056 057 //------------------------------------------------------------------------------------------------------------------- 058 // Static 059 //------------------------------------------------------------------------------------------------------------------- 060 061 private static final Set<String> whitespaceElements = set("br","bs","sp","ff"); 062 063 /** 064 * Creates a new builder for this object. 065 * 066 * @param ctx The context creating this session. 067 * @return A new builder. 068 */ 069 public static Builder create(HtmlParser ctx) { 070 return new Builder(ctx); 071 } 072 073 //------------------------------------------------------------------------------------------------------------------- 074 // Builder 075 //------------------------------------------------------------------------------------------------------------------- 076 077 /** 078 * Builder class. 079 */ 080 public static class Builder extends XmlParserSession.Builder { 081 082 HtmlParser ctx; 083 084 /** 085 * Constructor 086 * 087 * @param ctx The context creating this session. 088 */ 089 protected Builder(HtmlParser ctx) { 090 super(ctx); 091 this.ctx = ctx; 092 } 093 094 @Override 095 public HtmlParserSession build() { 096 return new HtmlParserSession(this); 097 } 098 @Override /* Overridden from Builder */ 099 public <T> Builder apply(Class<T> type, Consumer<T> apply) { 100 super.apply(type, apply); 101 return this; 102 } 103 104 @Override /* Overridden from Builder */ 105 public Builder debug(Boolean value) { 106 super.debug(value); 107 return this; 108 } 109 110 @Override /* Overridden from Builder */ 111 public Builder properties(Map<String,Object> value) { 112 super.properties(value); 113 return this; 114 } 115 116 @Override /* Overridden from Builder */ 117 public Builder property(String key, Object value) { 118 super.property(key, value); 119 return this; 120 } 121 122 @Override /* Overridden from Builder */ 123 public Builder unmodifiable() { 124 super.unmodifiable(); 125 return this; 126 } 127 128 @Override /* Overridden from Builder */ 129 public Builder locale(Locale value) { 130 super.locale(value); 131 return this; 132 } 133 134 @Override /* Overridden from Builder */ 135 public Builder localeDefault(Locale value) { 136 super.localeDefault(value); 137 return this; 138 } 139 140 @Override /* Overridden from Builder */ 141 public Builder mediaType(MediaType value) { 142 super.mediaType(value); 143 return this; 144 } 145 146 @Override /* Overridden from Builder */ 147 public Builder mediaTypeDefault(MediaType value) { 148 super.mediaTypeDefault(value); 149 return this; 150 } 151 152 @Override /* Overridden from Builder */ 153 public Builder timeZone(TimeZone value) { 154 super.timeZone(value); 155 return this; 156 } 157 158 @Override /* Overridden from Builder */ 159 public Builder timeZoneDefault(TimeZone value) { 160 super.timeZoneDefault(value); 161 return this; 162 } 163 164 @Override /* Overridden from Builder */ 165 public Builder javaMethod(Method value) { 166 super.javaMethod(value); 167 return this; 168 } 169 170 @Override /* Overridden from Builder */ 171 public Builder outer(Object value) { 172 super.outer(value); 173 return this; 174 } 175 176 @Override /* Overridden from Builder */ 177 public Builder schema(HttpPartSchema value) { 178 super.schema(value); 179 return this; 180 } 181 182 @Override /* Overridden from Builder */ 183 public Builder schemaDefault(HttpPartSchema value) { 184 super.schemaDefault(value); 185 return this; 186 } 187 188 @Override /* Overridden from Builder */ 189 public Builder fileCharset(Charset value) { 190 super.fileCharset(value); 191 return this; 192 } 193 194 @Override /* Overridden from Builder */ 195 public Builder streamCharset(Charset value) { 196 super.streamCharset(value); 197 return this; 198 } 199 } 200 201 //------------------------------------------------------------------------------------------------------------------- 202 // Instance 203 //------------------------------------------------------------------------------------------------------------------- 204 205 private final HtmlParser ctx; 206 207 /** 208 * Constructor. 209 * 210 * @param builder The builder for this object. 211 */ 212 protected HtmlParserSession(Builder builder) { 213 super(builder); 214 ctx = builder.ctx; 215 } 216 217 @Override /* ParserSession */ 218 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 219 try { 220 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 221 } catch (XMLStreamException e) { 222 throw new ParseException(e); 223 } 224 } 225 226 @Override /* ReaderParserSession */ 227 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 228 throws Exception { 229 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 230 (ClassMeta<V>)getClassMeta(valueType), null); 231 } 232 233 @Override /* ReaderParserSession */ 234 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 235 throws Exception { 236 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 237 } 238 239 /* 240 * Reads anything starting at the current event. 241 * <p> 242 * Precondition: Must be pointing at outer START_ELEMENT. 243 * Postcondition: Pointing at outer END_ELEMENT. 244 */ 245 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 246 247 if (eType == null) 248 eType = (ClassMeta<T>)object(); 249 ObjectSwap<T,Object> swap = (ObjectSwap<T,Object>)eType.getSwap(this); 250 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 251 ClassMeta<?> sType = null; 252 if (builder != null) 253 sType = builder.getBuilderClassMeta(this); 254 else if (swap != null) 255 sType = swap.getSwapClassMeta(this); 256 else 257 sType = eType; 258 259 if (sType.isOptional()) 260 return (T)Utils.opt(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 261 262 setCurrentClass(sType); 263 264 int event = r.getEventType(); 265 if (event != START_ELEMENT) 266 throw new ParseException(this, "parseAnything must be called on outer start element."); 267 268 if (! isRoot) 269 event = r.next(); 270 boolean isEmpty = (event == END_ELEMENT); 271 272 // Skip until we find a start element, end document, or non-empty text. 273 if (! isEmpty) 274 event = skipWs(r); 275 276 if (event == END_DOCUMENT) 277 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 278 279 // Handle @Html(asXml=true) beans. 280 HtmlClassMeta hcm = getHtmlClassMeta(sType); 281 if (hcm.getFormat() == HtmlFormat.XML) 282 return super.parseAnything(eType, null, r, outer, false, pMeta); 283 284 Object o = null; 285 286 boolean isValid = true; 287 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 288 289 // If it's not a known tag, then parse it as XML. 290 // Allows us to parse stuff like "<div/>" into HTML5 beans. 291 if (tag == null && event != CHARACTERS) 292 return super.parseAnything(eType, null, r, outer, false, pMeta); 293 294 if (tag == HTML) 295 tag = skipToData(r); 296 297 if (isEmpty) { 298 o = ""; 299 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 300 String text = parseText(r); 301 if (sType.isObject() || sType.isCharSequence()) 302 o = text; 303 else if (sType.isChar()) 304 o = parseCharacter(text); 305 else if (sType.isBoolean()) 306 o = Boolean.parseBoolean(text); 307 else if (sType.isNumber()) 308 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 309 else if (sType.canCreateNewInstanceFromString(outer)) 310 o = sType.newInstanceFromString(outer, text); 311 else 312 isValid = false; 313 314 } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) { 315 String text = getElementText(r); 316 if (sType.isObject() || sType.isCharSequence()) 317 o = text; 318 else if (sType.isChar()) 319 o = parseCharacter(text); 320 else if (sType.canCreateNewInstanceFromString(outer)) 321 o = sType.newInstanceFromString(outer, text); 322 else 323 isValid = false; 324 skipTag(r, tag == STRING ? xSTRING : xA); 325 326 } else if (tag == NUMBER) { 327 String text = getElementText(r); 328 if (sType.isObject()) 329 o = parseNumber(text, Number.class); 330 else if (sType.isNumber()) 331 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 332 else 333 isValid = false; 334 skipTag(r, xNUMBER); 335 336 } else if (tag == BOOLEAN) { 337 String text = getElementText(r); 338 if (sType.isObject() || sType.isBoolean()) 339 o = Boolean.parseBoolean(text); 340 else 341 isValid = false; 342 skipTag(r, xBOOLEAN); 343 344 } else if (tag == P) { 345 String text = getElementText(r); 346 if (! "No Results".equals(text)) 347 isValid = false; 348 skipTag(r, xP); 349 350 } else if (tag == NULL) { 351 skipTag(r, NULL); 352 skipTag(r, xNULL); 353 354 } else if (tag == A) { 355 o = parseAnchor(r, swap == null ? eType : null); 356 skipTag(r, xA); 357 358 } else if (tag == TABLE) { 359 360 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 361 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 362 363 if (cm != null) { 364 sType = eType = cm; 365 typeName = sType.isCollectionOrArray() ? "array" : "object"; 366 } else if (! "array".equals(typeName)) { 367 // Type name could be a subtype name. 368 typeName = sType.isCollectionOrArray() ? "array" : "object"; 369 } 370 371 if (typeName.equals("object")) { 372 if (sType.isObject()) { 373 o = parseIntoMap(r, newGenericMap(sType), sType.getKeyType(), sType.getValueType(), 374 pMeta); 375 } else if (sType.isMap()) { 376 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 377 : newGenericMap(sType)), sType.getKeyType(), sType.getValueType(), pMeta); 378 } else if (builder != null) { 379 BeanMap m = toBeanMap(builder.create(this, eType)); 380 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 381 } else if (sType.canCreateNewBean(outer)) { 382 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 383 o = parseIntoBean(r, m).getBean(); 384 } else if (sType.getProxyInvocationHandler() != null) { 385 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 386 o = parseIntoBean(r, m).getBean(); 387 } else { 388 isValid = false; 389 } 390 skipTag(r, xTABLE); 391 392 } else if (typeName.equals("array")) { 393 if (sType.isObject()) 394 o = parseTableIntoCollection(r, (Collection)new JsonList(this), sType, pMeta); 395 else if (sType.isCollection()) 396 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 397 ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 398 else if (sType.isArray() || sType.isArgs()) { 399 ArrayList l = (ArrayList)parseTableIntoCollection(r, list(), sType, pMeta); 400 o = toArray(sType, l); 401 } 402 else 403 isValid = false; 404 skipTag(r, xTABLE); 405 406 } else { 407 isValid = false; 408 } 409 410 } else if (tag == UL) { 411 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 412 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 413 if (cm != null) 414 sType = eType = cm; 415 416 if (sType.isObject()) 417 o = parseIntoCollection(r, new JsonList(this), sType, pMeta); 418 else if (sType.isCollection() || sType.isObject()) 419 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 420 ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 421 else if (sType.isArray() || sType.isArgs()) 422 o = toArray(sType, parseIntoCollection(r, list(), sType, pMeta)); 423 else 424 isValid = false; 425 skipTag(r, xUL); 426 427 } 428 429 if (! isValid) 430 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 431 432 if (swap != null && o != null) 433 o = unswap(swap, o, eType); 434 435 if (outer != null) 436 setParent(eType, o, outer); 437 438 skipWs(r); 439 return (T)o; 440 } 441 442 /* 443 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 444 */ 445 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 446 while (true) { 447 int event = r.next(); 448 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 449 r.nextTag(); 450 event = r.getEventType(); 451 boolean isEmpty = (event == END_ELEMENT); 452 // Skip until we find a start element, end document, or non-empty text. 453 if (! isEmpty) 454 event = skipWs(r); 455 if (event == END_DOCUMENT) 456 throw new ParseException(this, "Unexpected end of stream looking for data."); 457 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 458 } 459 } 460 } 461 462 private static String getAttribute(XmlReader r, String name, String def) { 463 for (int i = 0; i < r.getAttributeCount(); i++) 464 if (r.getAttributeLocalName(i).equals(name)) 465 return r.getAttributeValue(i); 466 return def; 467 } 468 469 /* 470 * Reads an anchor tag and converts it into a bean. 471 */ 472 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 473 throws IOException, ParseException, XMLStreamException { 474 String href = r.getAttributeValue(null, "href"); 475 String name = getElementText(r); 476 if (beanType != null && beanType.hasAnnotation(HtmlLink.class)) { 477 Value<String> uriProperty = Value.empty(), nameProperty = Value.empty(); 478 beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.uriProperty()), x -> uriProperty.set(x.uriProperty())); 479 beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.nameProperty()), x -> nameProperty.set(x.nameProperty())); 480 BeanMap<T> m = newBeanMap(beanType.getInnerClass()); 481 m.put(uriProperty.orElse(""), href); 482 m.put(nameProperty.orElse(""), name); 483 return m.getBean(); 484 } 485 return convertToType(href, beanType); 486 } 487 488 private static Map<String,String> getAttributes(XmlReader r) { 489 Map<String,String> m = new TreeMap<>() ; 490 for (int i = 0; i < r.getAttributeCount(); i++) 491 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 492 return m; 493 } 494 495 /* 496 * Reads contents of <table> element. 497 * Precondition: Must be pointing at <table> event. 498 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 499 */ 500 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 501 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 502 while (true) { 503 HtmlTag tag = nextTag(r, TR, xTABLE); 504 if (tag == xTABLE) 505 break; 506 tag = nextTag(r, TD, TH); 507 // Skip over the column headers. 508 if (tag == TH) { 509 skipTag(r); 510 r.nextTag(); 511 skipTag(r); 512 } else { 513 K key = parseAnything(keyType, r, m, false, pMeta); 514 nextTag(r, TD); 515 V value = parseAnything(valueType, r, m, false, pMeta); 516 setName(valueType, value, key); 517 m.put(key, value); 518 } 519 tag = nextTag(r, xTD, xTR); 520 if (tag == xTD) 521 nextTag(r, xTR); 522 } 523 524 return m; 525 } 526 527 /* 528 * Reads contents of <ul> element. 529 * Precondition: Must be pointing at event following <ul> event. 530 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 531 */ 532 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 533 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 534 int argIndex = 0; 535 while (true) { 536 HtmlTag tag = nextTag(r, LI, xUL, xLI); 537 if (tag == xLI) 538 tag = nextTag(r, LI, xUL, xLI); 539 if (tag == xUL) 540 break; 541 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 542 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 543 } 544 return l; 545 } 546 547 /* 548 * Reads contents of <ul> element. 549 * Precondition: Must be pointing at event following <ul> event. 550 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 551 */ 552 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 553 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 554 555 HtmlTag tag = nextTag(r, TR); 556 List<String> keys = list(); 557 while (true) { 558 tag = nextTag(r, TH, xTR); 559 if (tag == xTR) 560 break; 561 keys.add(getElementText(r)); 562 } 563 564 int argIndex = 0; 565 566 while (true) { 567 r.nextTag(); 568 tag = HtmlTag.forEvent(this, r); 569 if (tag == xTABLE) 570 break; 571 572 ClassMeta elementType = null; 573 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 574 if (beanType != null) 575 elementType = getClassMeta(beanType, pMeta, null); 576 if (elementType == null) 577 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 578 if (elementType == null) 579 elementType = object(); 580 581 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 582 583 if (builder != null || elementType.canCreateNewBean(l)) { 584 BeanMap m = 585 builder != null 586 ? toBeanMap(builder.create(this, elementType)) 587 : newBeanMap(l, elementType.getInnerClass()) 588 ; 589 for (String key : keys) { 590 tag = nextTag(r, xTD, TD, NULL); 591 if (tag == xTD) 592 tag = nextTag(r, TD, NULL); 593 if (tag == NULL) { 594 m = null; 595 nextTag(r, xNULL); 596 break; 597 } 598 BeanMapEntry e = m.getProperty(key); 599 if (e == null) { 600 //onUnknownProperty(key, m, -1, -1); 601 parseAnything(object(), r, l, false, null); 602 } else { 603 BeanPropertyMeta bpm = e.getMeta(); 604 ClassMeta<?> cm = bpm.getClassMeta(); 605 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 606 setName(cm, value, key); 607 bpm.set(m, key, value); 608 } 609 } 610 l.add( 611 m == null 612 ? null 613 : builder != null 614 ? builder.build(this, m.getBean(), elementType) 615 : (E)m.getBean() 616 ); 617 } else { 618 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 619 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 620 : newGenericMap(elementType)); 621 for (String key : keys) { 622 tag = nextTag(r, TD, NULL); 623 if (tag == NULL) { 624 m = null; 625 nextTag(r, xNULL); 626 break; 627 } 628 if (m != null) { 629 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 630 Object value = parseAnything(vt, r, l, false, pMeta); 631 setName(vt, value, key); 632 m.put(convertToType(key, kt), value); 633 } 634 } 635 if (m != null && c != null) { 636 JsonMap m2 = (m instanceof JsonMap ? (JsonMap)m : new JsonMap(m).session(this)); 637 m2.put(getBeanTypePropertyName(type.getElementType()), c); 638 l.add((E)cast(m2, pMeta, elementType)); 639 } else { 640 if (m instanceof JsonMap) 641 l.add((E)convertToType(m, elementType)); 642 else 643 l.add((E)m); 644 } 645 } 646 nextTag(r, xTR); 647 } 648 return l; 649 } 650 651 /* 652 * Reads contents of <table> element. 653 * Precondition: Must be pointing at event following <table> event. 654 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 655 */ 656 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 657 while (true) { 658 HtmlTag tag = nextTag(r, TR, xTABLE); 659 if (tag == xTABLE) 660 break; 661 tag = nextTag(r, TD, TH); 662 // Skip over the column headers. 663 if (tag == TH) { 664 skipTag(r); 665 r.nextTag(); 666 skipTag(r); 667 } else { 668 String key = getElementText(r); 669 nextTag(r, TD); 670 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 671 if (pMeta == null) { 672 onUnknownProperty(key, m, parseAnything(object(), r, null, false, null)); 673 } else { 674 ClassMeta<?> cm = pMeta.getClassMeta(); 675 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 676 setName(cm, value, key); 677 try { 678 pMeta.set(m, key, value); 679 } catch (BeanRuntimeException e) { 680 onBeanSetterException(pMeta, e); 681 throw e; 682 } 683 } 684 } 685 HtmlTag t = nextTag(r, xTD, xTR); 686 if (t == xTD) 687 nextTag(r, xTR); 688 } 689 return m; 690 } 691 692 /* 693 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 694 * it's not one of the expected tags. 695 * Precondition: Must be pointing before the event we want to parse. 696 * Postcondition: Pointing at the tag just parsed. 697 */ 698 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 699 int et = r.next(); 700 701 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 702 et = r.next(); 703 704 if (et == END_DOCUMENT) 705 throw new ParseException(this, "Unexpected end of document."); 706 707 HtmlTag tag = HtmlTag.forEvent(this, r); 708 if (expected.length == 0) 709 return tag; 710 for (HtmlTag t : expected) 711 if (t == tag) 712 return tag; 713 714 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 715 } 716 717 /* 718 * Skips over the current element and advances to the next element. 719 * <p> 720 * Precondition: Pointing to opening tag. 721 * Postcondition: Pointing to next opening tag. 722 * 723 * @param r The stream being read from. 724 * @throws XMLStreamException 725 */ 726 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 727 int et = r.getEventType(); 728 729 if (et != START_ELEMENT) 730 throw new ParseException(this, 731 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 732 XmlUtils.toReadableEvent(r) 733 ); 734 735 String n = r.getLocalName(); 736 737 int depth = 0; 738 while (true) { 739 et = r.next(); 740 if (et == START_ELEMENT) { 741 String n2 = r.getLocalName(); 742 if (n.equals(n2)) 743 depth++; 744 } else if (et == END_ELEMENT) { 745 String n2 = r.getLocalName(); 746 if (n.equals(n2)) 747 depth--; 748 if (depth < 0) 749 return; 750 } 751 } 752 } 753 754 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 755 HtmlTag tag = HtmlTag.forEvent(this, r); 756 if (tag.isOneOf(expected)) 757 r.next(); 758 else 759 throw new ParseException(this, 760 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 761 tag, expected); 762 } 763 764 private static int skipWs(XmlReader r) throws XMLStreamException { 765 int event = r.getEventType(); 766 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 767 event = r.next(); 768 return event; 769 } 770 771 /** 772 * Parses CHARACTERS data. 773 * 774 * <p> 775 * Precondition: Pointing to event immediately following opening tag. 776 * Postcondition: Pointing to closing tag. 777 * 778 * @param r The stream being read from. 779 * @return The parsed string. 780 * @throws XMLStreamException Thrown by underlying XML stream. 781 */ 782 @Override /* XmlParserSession */ 783 protected String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 784 785 StringBuilder sb = getStringBuilder(); 786 787 int et = r.getEventType(); 788 if (et == END_ELEMENT) 789 return ""; 790 791 int depth = 0; 792 793 String characters = null; 794 795 while (true) { 796 if (et == START_ELEMENT) { 797 if (characters != null) { 798 if (sb.length() == 0) 799 characters = trimStart(characters); 800 sb.append(characters); 801 characters = null; 802 } 803 HtmlTag tag = HtmlTag.forEvent(this, r); 804 if (tag == BR) { 805 sb.append('\n'); 806 r.nextTag(); 807 } else if (tag == BS) { 808 sb.append('\b'); 809 r.nextTag(); 810 } else if (tag == SP) { 811 et = r.next(); 812 if (et == CHARACTERS) { 813 String s = r.getText(); 814 if (isNotEmpty(s)) { 815 char c = r.getText().charAt(0); 816 if (c == '\u2003') 817 c = '\t'; 818 sb.append(c); 819 } 820 r.nextTag(); 821 } 822 } else if (tag == FF) { 823 sb.append('\f'); 824 r.nextTag(); 825 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 826 et = r.next(); 827 if (et == CHARACTERS) { 828 sb.append(r.getText()); 829 r.nextTag(); 830 } 831 } else { 832 sb.append('<').append(r.getLocalName()); 833 for (int i = 0; i < r.getAttributeCount(); i++) 834 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 835 sb.append('>'); 836 depth++; 837 } 838 } else if (et == END_ELEMENT) { 839 if (characters != null) { 840 if (sb.length() == 0) 841 characters = trimStart(characters); 842 if (depth == 0) 843 characters = trimEnd(characters); 844 sb.append(characters); 845 characters = null; 846 } 847 if (depth == 0) 848 break; 849 sb.append('<').append(r.getLocalName()).append('>'); 850 depth--; 851 } else if (et == CHARACTERS) { 852 characters = r.getText(); 853 } 854 et = r.next(); 855 } 856 857 String s = trim(sb.toString()); 858 returnStringBuilder(sb); 859 return s; 860 } 861 862 /** 863 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 864 * 865 * <p> 866 * Precondition: Pointing to opening tag. 867 * Postcondition: Pointing to closing tag. 868 * 869 * @param r The stream being read from. 870 * @return The parsed string. 871 * @throws XMLStreamException Thrown by underlying XML stream. 872 * @throws ParseException Malformed input encountered. 873 */ 874 @Override /* XmlParserSession */ 875 protected String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 876 r.next(); 877 return parseText(r); 878 } 879 880 @Override /* XmlParserSession */ 881 protected boolean isWhitespaceElement(XmlReader r) { 882 String s = r.getLocalName(); 883 return whitespaceElements.contains(s); 884 } 885 886 @Override /* XmlParserSession */ 887 protected String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 888 889 HtmlTag tag = HtmlTag.forEvent(this, r); 890 int et = r.next(); 891 if (tag == BR) { 892 return "\n"; 893 } else if (tag == BS) { 894 return "\b"; 895 } else if (tag == FF) { 896 return "\f"; 897 } else if (tag == SP) { 898 if (et == CHARACTERS) { 899 String s = r.getText(); 900 if (s.charAt(0) == '\u2003') 901 s = "\t"; 902 r.next(); 903 return decodeString(s); 904 } 905 return ""; 906 } else { 907 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 908 } 909 } 910 911 //----------------------------------------------------------------------------------------------------------------- 912 // Extended metadata 913 //----------------------------------------------------------------------------------------------------------------- 914 915 /** 916 * Returns the language-specific metadata on the specified class. 917 * 918 * @param cm The class to return the metadata on. 919 * @return The metadata. 920 */ 921 protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) { 922 return ctx.getHtmlClassMeta(cm); 923 } 924 925 /** 926 * Returns the language-specific metadata on the specified bean property. 927 * 928 * @param bpm The bean property to return the metadata on. 929 * @return The metadata. 930 */ 931 protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) { 932 return ctx.getHtmlBeanPropertyMeta(bpm); 933 } 934}