001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.io.IOException; 020import java.lang.reflect.*; 021import java.util.*; 022 023import javax.xml.stream.*; 024 025import org.apache.juneau.*; 026import org.apache.juneau.collections.*; 027import org.apache.juneau.html.annotation.*; 028import org.apache.juneau.parser.*; 029import org.apache.juneau.transform.*; 030import org.apache.juneau.xml.*; 031 032/** 033 * Session object that lives for the duration of a single use of {@link HtmlParser}. 034 * 035 * <p> 036 * This class is NOT thread safe. 037 * It is typically discarded after one-time use although it can be reused against multiple inputs. 038 */ 039@SuppressWarnings({ "unchecked", "rawtypes" }) 040public final class HtmlParserSession extends XmlParserSession { 041 042 private static final Set<String> whitespaceElements = ASet.of("br","bs","sp","ff"); 043 044 private final HtmlParser ctx; 045 046 /** 047 * Create a new session using properties specified in the context. 048 * 049 * @param ctx 050 * The context creating this session object. 051 * The context contains all the configuration settings for this object. 052 * @param args 053 * Runtime session arguments. 054 */ 055 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 056 super(ctx, args); 057 this.ctx = ctx; 058 } 059 060 @Override /* ParserSession */ 061 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 062 try { 063 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 064 } catch (XMLStreamException e) { 065 throw new ParseException(e); 066 } 067 } 068 069 @Override /* ReaderParserSession */ 070 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 071 throws Exception { 072 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 073 (ClassMeta<V>)getClassMeta(valueType), null); 074 } 075 076 @Override /* ReaderParserSession */ 077 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 078 throws Exception { 079 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 080 } 081 082 /* 083 * Reads anything starting at the current event. 084 * <p> 085 * Precondition: Must be pointing at outer START_ELEMENT. 086 * Postcondition: Pointing at outer END_ELEMENT. 087 */ 088 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 089 090 if (eType == null) 091 eType = (ClassMeta<T>)object(); 092 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getSwap(this); 093 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 094 ClassMeta<?> sType = null; 095 if (builder != null) 096 sType = builder.getBuilderClassMeta(this); 097 else if (swap != null) 098 sType = swap.getSwapClassMeta(this); 099 else 100 sType = eType; 101 102 if (sType.isOptional()) 103 return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 104 105 setCurrentClass(sType); 106 107 int event = r.getEventType(); 108 if (event != START_ELEMENT) 109 throw new ParseException(this, "parseAnything must be called on outer start element."); 110 111 if (! isRoot) 112 event = r.next(); 113 boolean isEmpty = (event == END_ELEMENT); 114 115 // Skip until we find a start element, end document, or non-empty text. 116 if (! isEmpty) 117 event = skipWs(r); 118 119 if (event == END_DOCUMENT) 120 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 121 122 // Handle @Html(asXml=true) beans. 123 HtmlClassMeta hcm = getHtmlClassMeta(sType); 124 if (hcm.getFormat() == HtmlFormat.XML) 125 return super.parseAnything(eType, null, r, outer, false, pMeta); 126 127 Object o = null; 128 129 boolean isValid = true; 130 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 131 132 // If it's not a known tag, then parse it as XML. 133 // Allows us to parse stuff like "<div/>" into HTML5 beans. 134 if (tag == null && event != CHARACTERS) 135 return super.parseAnything(eType, null, r, outer, false, pMeta); 136 137 if (tag == HTML) 138 tag = skipToData(r); 139 140 if (isEmpty) { 141 o = ""; 142 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 143 String text = parseText(r); 144 if (sType.isObject() || sType.isCharSequence()) 145 o = text; 146 else if (sType.isChar()) 147 o = parseCharacter(text); 148 else if (sType.isBoolean()) 149 o = Boolean.parseBoolean(text); 150 else if (sType.isNumber()) 151 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 152 else if (sType.canCreateNewInstanceFromString(outer)) 153 o = sType.newInstanceFromString(outer, text); 154 else 155 isValid = false; 156 157 } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) { 158 String text = getElementText(r); 159 if (sType.isObject() || sType.isCharSequence()) 160 o = text; 161 else if (sType.isChar()) 162 o = parseCharacter(text); 163 else if (sType.canCreateNewInstanceFromString(outer)) 164 o = sType.newInstanceFromString(outer, text); 165 else 166 isValid = false; 167 skipTag(r, tag == STRING ? xSTRING : xA); 168 169 } else if (tag == NUMBER) { 170 String text = getElementText(r); 171 if (sType.isObject()) 172 o = parseNumber(text, Number.class); 173 else if (sType.isNumber()) 174 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 175 else 176 isValid = false; 177 skipTag(r, xNUMBER); 178 179 } else if (tag == BOOLEAN) { 180 String text = getElementText(r); 181 if (sType.isObject() || sType.isBoolean()) 182 o = Boolean.parseBoolean(text); 183 else 184 isValid = false; 185 skipTag(r, xBOOLEAN); 186 187 } else if (tag == P) { 188 String text = getElementText(r); 189 if (! "No Results".equals(text)) 190 isValid = false; 191 skipTag(r, xP); 192 193 } else if (tag == NULL) { 194 skipTag(r, NULL); 195 skipTag(r, xNULL); 196 197 } else if (tag == A) { 198 o = parseAnchor(r, eType); 199 skipTag(r, xA); 200 201 } else if (tag == TABLE) { 202 203 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 204 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 205 206 if (cm != null) { 207 sType = eType = cm; 208 typeName = sType.isCollectionOrArray() ? "array" : "object"; 209 } else if (! "array".equals(typeName)) { 210 // Type name could be a subtype name. 211 typeName = sType.isCollectionOrArray() ? "array" : "object"; 212 } 213 214 if (typeName.equals("object")) { 215 if (sType.isObject()) { 216 o = parseIntoMap(r, (Map)new OMap(this), sType.getKeyType(), sType.getValueType(), 217 pMeta); 218 } else if (sType.isMap()) { 219 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 220 : new OMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 221 } else if (builder != null) { 222 BeanMap m = toBeanMap(builder.create(this, eType)); 223 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 224 } else if (sType.canCreateNewBean(outer)) { 225 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 226 o = parseIntoBean(r, m).getBean(); 227 } else if (sType.getProxyInvocationHandler() != null) { 228 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 229 o = parseIntoBean(r, m).getBean(); 230 } else { 231 isValid = false; 232 } 233 skipTag(r, xTABLE); 234 235 } else if (typeName.equals("array")) { 236 if (sType.isObject()) 237 o = parseTableIntoCollection(r, (Collection)new OList(this), sType, pMeta); 238 else if (sType.isCollection()) 239 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 240 ? sType.newInstance(outer) : new OList(this)), sType, pMeta); 241 else if (sType.isArray() || sType.isArgs()) { 242 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 243 o = toArray(sType, l); 244 } 245 else 246 isValid = false; 247 skipTag(r, xTABLE); 248 249 } else { 250 isValid = false; 251 } 252 253 } else if (tag == UL) { 254 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 255 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 256 if (cm != null) 257 sType = eType = cm; 258 259 if (sType.isObject()) 260 o = parseIntoCollection(r, new OList(this), sType, pMeta); 261 else if (sType.isCollection() || sType.isObject()) 262 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 263 ? sType.newInstance(outer) : new OList(this)), sType, pMeta); 264 else if (sType.isArray() || sType.isArgs()) 265 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 266 else 267 isValid = false; 268 skipTag(r, xUL); 269 270 } 271 272 if (! isValid) 273 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 274 275 if (swap != null && o != null) 276 o = unswap(swap, o, eType); 277 278 if (outer != null) 279 setParent(eType, o, outer); 280 281 skipWs(r); 282 return (T)o; 283 } 284 285 /* 286 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 287 */ 288 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 289 while (true) { 290 int event = r.next(); 291 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 292 r.nextTag(); 293 event = r.getEventType(); 294 boolean isEmpty = (event == END_ELEMENT); 295 // Skip until we find a start element, end document, or non-empty text. 296 if (! isEmpty) 297 event = skipWs(r); 298 if (event == END_DOCUMENT) 299 throw new ParseException(this, "Unexpected end of stream looking for data."); 300 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 301 } 302 } 303 } 304 305 private static String getAttribute(XmlReader r, String name, String def) { 306 for (int i = 0; i < r.getAttributeCount(); i++) 307 if (r.getAttributeLocalName(i).equals(name)) 308 return r.getAttributeValue(i); 309 return def; 310 } 311 312 /* 313 * Reads an anchor tag and converts it into a bean. 314 */ 315 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 316 throws IOException, ParseException, XMLStreamException { 317 String href = r.getAttributeValue(null, "href"); 318 String name = getElementText(r); 319 if (beanType.hasAnnotation(HtmlLink.class)) { 320 String uriProperty = "", nameProperty = ""; 321 for (HtmlLink a : beanType.getAnnotations(HtmlLink.class)) { 322 if (! a.uriProperty().isEmpty()) 323 uriProperty = a.uriProperty(); 324 if (! a.nameProperty().isEmpty()) 325 nameProperty = a.nameProperty(); 326 } 327 BeanMap<T> m = newBeanMap(beanType.getInnerClass()); 328 m.put(uriProperty, href); 329 m.put(nameProperty, name); 330 return m.getBean(); 331 } 332 return convertToType(href, beanType); 333 } 334 335 private static Map<String,String> getAttributes(XmlReader r) { 336 Map<String,String> m = new TreeMap<>() ; 337 for (int i = 0; i < r.getAttributeCount(); i++) 338 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 339 return m; 340 } 341 342 /* 343 * Reads contents of <table> element. 344 * Precondition: Must be pointing at <table> event. 345 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 346 */ 347 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 348 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 349 while (true) { 350 HtmlTag tag = nextTag(r, TR, xTABLE); 351 if (tag == xTABLE) 352 break; 353 tag = nextTag(r, TD, TH); 354 // Skip over the column headers. 355 if (tag == TH) { 356 skipTag(r); 357 r.nextTag(); 358 skipTag(r); 359 } else { 360 K key = parseAnything(keyType, r, m, false, pMeta); 361 nextTag(r, TD); 362 V value = parseAnything(valueType, r, m, false, pMeta); 363 setName(valueType, value, key); 364 m.put(key, value); 365 } 366 tag = nextTag(r, xTD, xTR); 367 if (tag == xTD) 368 nextTag(r, xTR); 369 } 370 371 return m; 372 } 373 374 /* 375 * Reads contents of <ul> element. 376 * Precondition: Must be pointing at event following <ul> event. 377 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 378 */ 379 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 380 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 381 int argIndex = 0; 382 while (true) { 383 HtmlTag tag = nextTag(r, LI, xUL, xLI); 384 if (tag == xLI) 385 tag = nextTag(r, LI, xUL, xLI); 386 if (tag == xUL) 387 break; 388 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 389 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 390 } 391 return l; 392 } 393 394 /* 395 * Reads contents of <ul> element. 396 * Precondition: Must be pointing at event following <ul> event. 397 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 398 */ 399 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 400 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 401 402 HtmlTag tag = nextTag(r, TR); 403 List<String> keys = new ArrayList<>(); 404 while (true) { 405 tag = nextTag(r, TH, xTR); 406 if (tag == xTR) 407 break; 408 keys.add(getElementText(r)); 409 } 410 411 int argIndex = 0; 412 413 while (true) { 414 r.nextTag(); 415 tag = HtmlTag.forEvent(this, r); 416 if (tag == xTABLE) 417 break; 418 419 ClassMeta elementType = null; 420 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 421 if (beanType != null) 422 elementType = getClassMeta(beanType, pMeta, null); 423 if (elementType == null) 424 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 425 if (elementType == null) 426 elementType = object(); 427 428 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 429 430 if (builder != null || elementType.canCreateNewBean(l)) { 431 BeanMap m = 432 builder != null 433 ? toBeanMap(builder.create(this, elementType)) 434 : newBeanMap(l, elementType.getInnerClass()) 435 ; 436 for (int i = 0; i < keys.size(); i++) { 437 tag = nextTag(r, xTD, TD, NULL); 438 if (tag == xTD) 439 tag = nextTag(r, TD, NULL); 440 if (tag == NULL) { 441 m = null; 442 nextTag(r, xNULL); 443 break; 444 } 445 String key = keys.get(i); 446 BeanMapEntry e = m.getProperty(key); 447 if (e == null) { 448 //onUnknownProperty(key, m, -1, -1); 449 parseAnything(object(), r, l, false, null); 450 } else { 451 BeanPropertyMeta bpm = e.getMeta(); 452 ClassMeta<?> cm = bpm.getClassMeta(); 453 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 454 setName(cm, value, key); 455 bpm.set(m, key, value); 456 } 457 } 458 l.add( 459 m == null 460 ? null 461 : builder != null 462 ? builder.build(this, m.getBean(), elementType) 463 : (E)m.getBean() 464 ); 465 } else { 466 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 467 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 468 : new OMap(this)); 469 for (int i = 0; i < keys.size(); i++) { 470 tag = nextTag(r, TD, NULL); 471 if (tag == NULL) { 472 m = null; 473 nextTag(r, xNULL); 474 break; 475 } 476 String key = keys.get(i); 477 if (m != null) { 478 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 479 Object value = parseAnything(vt, r, l, false, pMeta); 480 setName(vt, value, key); 481 m.put(convertToType(key, kt), value); 482 } 483 } 484 if (m != null && c != null) { 485 OMap m2 = (m instanceof OMap ? (OMap)m : new OMap(m).session(this)); 486 m2.put(getBeanTypePropertyName(type.getElementType()), c); 487 l.add((E)cast(m2, pMeta, elementType)); 488 } else { 489 if (m instanceof OMap) 490 l.add((E)convertToType(m, elementType)); 491 else 492 l.add((E)m); 493 } 494 } 495 nextTag(r, xTR); 496 } 497 return l; 498 } 499 500 /* 501 * Reads contents of <table> element. 502 * Precondition: Must be pointing at event following <table> event. 503 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 504 */ 505 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 506 while (true) { 507 HtmlTag tag = nextTag(r, TR, xTABLE); 508 if (tag == xTABLE) 509 break; 510 tag = nextTag(r, TD, TH); 511 // Skip over the column headers. 512 if (tag == TH) { 513 skipTag(r); 514 r.nextTag(); 515 skipTag(r); 516 } else { 517 String key = getElementText(r); 518 nextTag(r, TD); 519 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 520 if (pMeta == null) { 521 onUnknownProperty(key, m, parseAnything(object(), r, null, false, null)); 522 } else { 523 ClassMeta<?> cm = pMeta.getClassMeta(); 524 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 525 setName(cm, value, key); 526 try { 527 pMeta.set(m, key, value); 528 } catch (BeanRuntimeException e) { 529 onBeanSetterException(pMeta, e); 530 throw e; 531 } 532 } 533 } 534 HtmlTag t = nextTag(r, xTD, xTR); 535 if (t == xTD) 536 nextTag(r, xTR); 537 } 538 return m; 539 } 540 541 /* 542 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 543 * it's not one of the expected tags. 544 * Precondition: Must be pointing before the event we want to parse. 545 * Postcondition: Pointing at the tag just parsed. 546 */ 547 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 548 int et = r.next(); 549 550 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 551 et = r.next(); 552 553 if (et == END_DOCUMENT) 554 throw new ParseException(this, "Unexpected end of document."); 555 556 HtmlTag tag = HtmlTag.forEvent(this, r); 557 if (expected.length == 0) 558 return tag; 559 for (HtmlTag t : expected) 560 if (t == tag) 561 return tag; 562 563 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 564 } 565 566 /* 567 * Skips over the current element and advances to the next element. 568 * <p> 569 * Precondition: Pointing to opening tag. 570 * Postcondition: Pointing to next opening tag. 571 * 572 * @param r The stream being read from. 573 * @throws XMLStreamException 574 */ 575 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 576 int et = r.getEventType(); 577 578 if (et != START_ELEMENT) 579 throw new ParseException(this, 580 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 581 XmlUtils.toReadableEvent(r) 582 ); 583 584 String n = r.getLocalName(); 585 586 int depth = 0; 587 while (true) { 588 et = r.next(); 589 if (et == START_ELEMENT) { 590 String n2 = r.getLocalName(); 591 if (n.equals(n2)) 592 depth++; 593 } else if (et == END_ELEMENT) { 594 String n2 = r.getLocalName(); 595 if (n.equals(n2)) 596 depth--; 597 if (depth < 0) 598 return; 599 } 600 } 601 } 602 603 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 604 HtmlTag tag = HtmlTag.forEvent(this, r); 605 if (tag.isOneOf(expected)) 606 r.next(); 607 else 608 throw new ParseException(this, 609 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 610 tag, expected); 611 } 612 613 private static int skipWs(XmlReader r) throws XMLStreamException { 614 int event = r.getEventType(); 615 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 616 event = r.next(); 617 return event; 618 } 619 620 /** 621 * Parses CHARACTERS data. 622 * 623 * <p> 624 * Precondition: Pointing to event immediately following opening tag. 625 * Postcondition: Pointing to closing tag. 626 * 627 * @param r The stream being read from. 628 * @return The parsed string. 629 * @throws XMLStreamException Thrown by underlying XML stream. 630 */ 631 @Override /* XmlParserSession */ 632 protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 633 634 StringBuilder sb = getStringBuilder(); 635 636 int et = r.getEventType(); 637 if (et == END_ELEMENT) 638 return ""; 639 640 int depth = 0; 641 642 String characters = null; 643 644 while (true) { 645 if (et == START_ELEMENT) { 646 if (characters != null) { 647 if (sb.length() == 0) 648 characters = trimStart(characters); 649 sb.append(characters); 650 characters = null; 651 } 652 HtmlTag tag = HtmlTag.forEvent(this, r); 653 if (tag == BR) { 654 sb.append('\n'); 655 r.nextTag(); 656 } else if (tag == BS) { 657 sb.append('\b'); 658 r.nextTag(); 659 } else if (tag == SP) { 660 et = r.next(); 661 if (et == CHARACTERS) { 662 String s = r.getText(); 663 if (s.length() > 0) { 664 char c = r.getText().charAt(0); 665 if (c == '\u2003') 666 c = '\t'; 667 sb.append(c); 668 } 669 r.nextTag(); 670 } 671 } else if (tag == FF) { 672 sb.append('\f'); 673 r.nextTag(); 674 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 675 et = r.next(); 676 if (et == CHARACTERS) { 677 sb.append(r.getText()); 678 r.nextTag(); 679 } 680 } else { 681 sb.append('<').append(r.getLocalName()); 682 for (int i = 0; i < r.getAttributeCount(); i++) 683 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 684 sb.append('>'); 685 depth++; 686 } 687 } else if (et == END_ELEMENT) { 688 if (characters != null) { 689 if (sb.length() == 0) 690 characters = trimStart(characters); 691 if (depth == 0) 692 characters = trimEnd(characters); 693 sb.append(characters); 694 characters = null; 695 } 696 if (depth == 0) 697 break; 698 sb.append('<').append(r.getLocalName()).append('>'); 699 depth--; 700 } else if (et == CHARACTERS) { 701 characters = r.getText(); 702 } 703 et = r.next(); 704 } 705 706 String s = trim(sb.toString()); 707 returnStringBuilder(sb); 708 return s; 709 } 710 711 /** 712 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 713 * 714 * <p> 715 * Precondition: Pointing to opening tag. 716 * Postcondition: Pointing to closing tag. 717 * 718 * @param r The stream being read from. 719 * @return The parsed string. 720 * @throws XMLStreamException Thrown by underlying XML stream. 721 * @throws ParseException Malformed input encountered. 722 */ 723 @Override /* XmlParserSession */ 724 protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 725 r.next(); 726 return parseText(r); 727 } 728 729 @Override /* XmlParserSession */ 730 protected final boolean isWhitespaceElement(XmlReader r) { 731 String s = r.getLocalName(); 732 return whitespaceElements.contains(s); 733 } 734 735 @Override /* XmlParserSession */ 736 protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 737 738 HtmlTag tag = HtmlTag.forEvent(this, r); 739 int et = r.next(); 740 if (tag == BR) { 741 return "\n"; 742 } else if (tag == BS) { 743 return "\b"; 744 } else if (tag == FF) { 745 return "\f"; 746 } else if (tag == SP) { 747 if (et == CHARACTERS) { 748 String s = r.getText(); 749 if (s.charAt(0) == '\u2003') 750 s = "\t"; 751 r.next(); 752 return decodeString(s); 753 } 754 return ""; 755 } else { 756 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 757 } 758 } 759 760 //----------------------------------------------------------------------------------------------------------------- 761 // Extended metadata 762 //----------------------------------------------------------------------------------------------------------------- 763 764 /** 765 * Returns the language-specific metadata on the specified class. 766 * 767 * @param cm The class to return the metadata on. 768 * @return The metadata. 769 */ 770 protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) { 771 return ctx.getHtmlClassMeta(cm); 772 } 773 774 /** 775 * Returns the language-specific metadata on the specified bean property. 776 * 777 * @param bpm The bean property to return the metadata on. 778 * @return The metadata. 779 */ 780 protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) { 781 return ctx.getHtmlBeanPropertyMeta(bpm); 782 } 783 784 //----------------------------------------------------------------------------------------------------------------- 785 // Other methods 786 //----------------------------------------------------------------------------------------------------------------- 787 788 @Override /* Session */ 789 public OMap toMap() { 790 return super.toMap() 791 .a("HtmlParserSession", new DefaultFilteringOMap() 792 ); 793 } 794}