001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.io.IOException; 020import java.lang.reflect.*; 021import java.util.*; 022 023import javax.xml.stream.*; 024 025import org.apache.juneau.*; 026import org.apache.juneau.html.annotation.*; 027import org.apache.juneau.parser.*; 028import org.apache.juneau.transform.*; 029import org.apache.juneau.xml.*; 030 031/** 032 * Session object that lives for the duration of a single use of {@link HtmlParser}. 033 * 034 * <p> 035 * This class is NOT thread safe. 036 * It is typically discarded after one-time use although it can be reused against multiple inputs. 037 */ 038@SuppressWarnings({ "unchecked", "rawtypes" }) 039public final class HtmlParserSession extends XmlParserSession { 040 041 private static final Set<String> whitespaceElements = new HashSet<>( 042 Arrays.asList( 043 new String[]{"br","bs","sp","ff"} 044 ) 045 ); 046 047 private final HtmlParser ctx; 048 049 /** 050 * Create a new session using properties specified in the context. 051 * 052 * @param ctx 053 * The context creating this session object. 054 * The context contains all the configuration settings for this object. 055 * @param args 056 * Runtime session arguments. 057 */ 058 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 059 super(ctx, args); 060 this.ctx = ctx; 061 } 062 063 @Override /* ParserSession */ 064 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 065 try { 066 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 067 } catch (XMLStreamException e) { 068 throw new ParseException(e); 069 } 070 } 071 072 @Override /* ReaderParserSession */ 073 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 074 throws Exception { 075 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 076 (ClassMeta<V>)getClassMeta(valueType), null); 077 } 078 079 @Override /* ReaderParserSession */ 080 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 081 throws Exception { 082 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 083 } 084 085 /* 086 * Reads anything starting at the current event. 087 * <p> 088 * Precondition: Must be pointing at outer START_ELEMENT. 089 * Postcondition: Pointing at outer END_ELEMENT. 090 */ 091 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 092 093 if (eType == null) 094 eType = (ClassMeta<T>)object(); 095 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 096 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 097 ClassMeta<?> sType = null; 098 if (builder != null) 099 sType = builder.getBuilderClassMeta(this); 100 else if (swap != null) 101 sType = swap.getSwapClassMeta(this); 102 else 103 sType = eType; 104 105 if (sType.isOptional()) 106 return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 107 108 setCurrentClass(sType); 109 110 int event = r.getEventType(); 111 if (event != START_ELEMENT) 112 throw new ParseException(this, "parseAnything must be called on outer start element."); 113 114 if (! isRoot) 115 event = r.next(); 116 boolean isEmpty = (event == END_ELEMENT); 117 118 // Skip until we find a start element, end document, or non-empty text. 119 if (! isEmpty) 120 event = skipWs(r); 121 122 if (event == END_DOCUMENT) 123 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 124 125 // Handle @Html(asXml=true) beans. 126 HtmlClassMeta hcm = getHtmlClassMeta(sType); 127 if (hcm.getFormat() == HtmlFormat.XML) 128 return super.parseAnything(eType, null, r, outer, false, pMeta); 129 130 Object o = null; 131 132 boolean isValid = true; 133 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 134 135 // If it's not a known tag, then parse it as XML. 136 // Allows us to parse stuff like "<div/>" into HTML5 beans. 137 if (tag == null && event != CHARACTERS) 138 return super.parseAnything(eType, null, r, outer, false, pMeta); 139 140 if (tag == HTML) 141 tag = skipToData(r); 142 143 if (isEmpty) { 144 o = ""; 145 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 146 String text = parseText(r); 147 if (sType.isObject() || sType.isCharSequence()) 148 o = text; 149 else if (sType.isChar()) 150 o = parseCharacter(text); 151 else if (sType.isBoolean()) 152 o = Boolean.parseBoolean(text); 153 else if (sType.isNumber()) 154 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 155 else if (sType.canCreateNewInstanceFromString(outer)) 156 o = sType.newInstanceFromString(outer, text); 157 else 158 isValid = false; 159 160 } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) { 161 String text = getElementText(r); 162 if (sType.isObject() || sType.isCharSequence()) 163 o = text; 164 else if (sType.isChar()) 165 o = parseCharacter(text); 166 else if (sType.canCreateNewInstanceFromString(outer)) 167 o = sType.newInstanceFromString(outer, text); 168 else 169 isValid = false; 170 skipTag(r, tag == STRING ? xSTRING : xA); 171 172 } else if (tag == NUMBER) { 173 String text = getElementText(r); 174 if (sType.isObject()) 175 o = parseNumber(text, Number.class); 176 else if (sType.isNumber()) 177 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 178 else 179 isValid = false; 180 skipTag(r, xNUMBER); 181 182 } else if (tag == BOOLEAN) { 183 String text = getElementText(r); 184 if (sType.isObject() || sType.isBoolean()) 185 o = Boolean.parseBoolean(text); 186 else 187 isValid = false; 188 skipTag(r, xBOOLEAN); 189 190 } else if (tag == P) { 191 String text = getElementText(r); 192 if (! "No Results".equals(text)) 193 isValid = false; 194 skipTag(r, xP); 195 196 } else if (tag == NULL) { 197 skipTag(r, NULL); 198 skipTag(r, xNULL); 199 200 } else if (tag == A) { 201 o = parseAnchor(r, eType); 202 skipTag(r, xA); 203 204 } else if (tag == TABLE) { 205 206 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 207 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 208 209 if (cm != null) { 210 sType = eType = cm; 211 typeName = sType.isCollectionOrArray() ? "array" : "object"; 212 } else if (! "array".equals(typeName)) { 213 // Type name could be a subtype name. 214 typeName = sType.isCollectionOrArray() ? "array" : "object"; 215 } 216 217 if (typeName.equals("object")) { 218 if (sType.isObject()) { 219 o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(), 220 pMeta); 221 } else if (sType.isMap()) { 222 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 223 : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 224 } else if (builder != null) { 225 BeanMap m = toBeanMap(builder.create(this, eType)); 226 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 227 } else if (sType.canCreateNewBean(outer)) { 228 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 229 o = parseIntoBean(r, m).getBean(); 230 } else { 231 isValid = false; 232 } 233 skipTag(r, xTABLE); 234 235 } else if (typeName.equals("array")) { 236 if (sType.isObject()) 237 o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta); 238 else if (sType.isCollection()) 239 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 240 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 241 else if (sType.isArray() || sType.isArgs()) { 242 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 243 o = toArray(sType, l); 244 } 245 else 246 isValid = false; 247 skipTag(r, xTABLE); 248 249 } else { 250 isValid = false; 251 } 252 253 } else if (tag == UL) { 254 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 255 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 256 if (cm != null) 257 sType = eType = cm; 258 259 if (sType.isObject()) 260 o = parseIntoCollection(r, new ObjectList(this), sType, pMeta); 261 else if (sType.isCollection() || sType.isObject()) 262 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 263 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 264 else if (sType.isArray() || sType.isArgs()) 265 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 266 else 267 isValid = false; 268 skipTag(r, xUL); 269 270 } 271 272 if (! isValid) 273 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 274 275 if (swap != null && o != null) 276 o = unswap(swap, o, eType); 277 278 if (outer != null) 279 setParent(eType, o, outer); 280 281 skipWs(r); 282 return (T)o; 283 } 284 285 /* 286 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 287 */ 288 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 289 while (true) { 290 int event = r.next(); 291 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 292 r.nextTag(); 293 event = r.getEventType(); 294 boolean isEmpty = (event == END_ELEMENT); 295 // Skip until we find a start element, end document, or non-empty text. 296 if (! isEmpty) 297 event = skipWs(r); 298 if (event == END_DOCUMENT) 299 throw new ParseException(this, "Unexpected end of stream looking for data."); 300 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 301 } 302 } 303 } 304 305 private static String getAttribute(XmlReader r, String name, String def) { 306 for (int i = 0; i < r.getAttributeCount(); i++) 307 if (r.getAttributeLocalName(i).equals(name)) 308 return r.getAttributeValue(i); 309 return def; 310 } 311 312 /* 313 * Reads an anchor tag and converts it into a bean. 314 */ 315 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 316 throws IOException, ParseException, XMLStreamException { 317 String href = r.getAttributeValue(null, "href"); 318 String name = getElementText(r); 319 if (beanType.hasAnnotation(HtmlLink.class)) { 320 HtmlLink h = beanType.getAnnotation(HtmlLink.class); 321 BeanMap<T> m = newBeanMap(beanType.getInnerClass()); 322 m.put(h.uriProperty(), href); 323 m.put(h.nameProperty(), name); 324 return m.getBean(); 325 } 326 return convertToType(href, beanType); 327 } 328 329 private static Map<String,String> getAttributes(XmlReader r) { 330 Map<String,String> m = new TreeMap<>() ; 331 for (int i = 0; i < r.getAttributeCount(); i++) 332 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 333 return m; 334 } 335 336 /* 337 * Reads contents of <table> element. 338 * Precondition: Must be pointing at <table> event. 339 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 340 */ 341 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 342 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 343 while (true) { 344 HtmlTag tag = nextTag(r, TR, xTABLE); 345 if (tag == xTABLE) 346 break; 347 tag = nextTag(r, TD, TH); 348 // Skip over the column headers. 349 if (tag == TH) { 350 skipTag(r); 351 r.nextTag(); 352 skipTag(r); 353 } else { 354 K key = parseAnything(keyType, r, m, false, pMeta); 355 nextTag(r, TD); 356 V value = parseAnything(valueType, r, m, false, pMeta); 357 setName(valueType, value, key); 358 m.put(key, value); 359 } 360 nextTag(r, xTR); 361 } 362 363 return m; 364 } 365 366 /* 367 * Reads contents of <ul> element. 368 * Precondition: Must be pointing at event following <ul> event. 369 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 370 */ 371 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 372 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 373 int argIndex = 0; 374 while (true) { 375 HtmlTag tag = nextTag(r, LI, xUL); 376 if (tag == xUL) 377 break; 378 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 379 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 380 } 381 return l; 382 } 383 384 /* 385 * Reads contents of <ul> element. 386 * Precondition: Must be pointing at event following <ul> event. 387 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 388 */ 389 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 390 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 391 392 HtmlTag tag = nextTag(r, TR); 393 List<String> keys = new ArrayList<>(); 394 while (true) { 395 tag = nextTag(r, TH, xTR); 396 if (tag == xTR) 397 break; 398 keys.add(getElementText(r)); 399 } 400 401 int argIndex = 0; 402 403 while (true) { 404 r.nextTag(); 405 tag = HtmlTag.forEvent(this, r); 406 if (tag == xTABLE) 407 break; 408 409 ClassMeta elementType = null; 410 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 411 if (beanType != null) 412 elementType = getClassMeta(beanType, pMeta, null); 413 if (elementType == null) 414 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 415 if (elementType == null) 416 elementType = object(); 417 418 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 419 420 if (builder != null || elementType.canCreateNewBean(l)) { 421 BeanMap m = 422 builder != null 423 ? toBeanMap(builder.create(this, elementType)) 424 : newBeanMap(l, elementType.getInnerClass()) 425 ; 426 for (int i = 0; i < keys.size(); i++) { 427 tag = nextTag(r, TD, NULL); 428 if (tag == NULL) { 429 m = null; 430 nextTag(r, xNULL); 431 break; 432 } 433 String key = keys.get(i); 434 BeanMapEntry e = m.getProperty(key); 435 if (e == null) { 436 //onUnknownProperty(key, m, -1, -1); 437 parseAnything(object(), r, l, false, null); 438 } else { 439 BeanPropertyMeta bpm = e.getMeta(); 440 ClassMeta<?> cm = bpm.getClassMeta(); 441 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 442 setName(cm, value, key); 443 bpm.set(m, key, value); 444 } 445 } 446 l.add( 447 m == null 448 ? null 449 : builder != null 450 ? builder.build(this, m.getBean(), elementType) 451 : (E)m.getBean() 452 ); 453 } else { 454 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 455 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 456 : new ObjectMap(this)); 457 for (int i = 0; i < keys.size(); i++) { 458 tag = nextTag(r, TD, NULL); 459 if (tag == NULL) { 460 m = null; 461 nextTag(r, xNULL); 462 break; 463 } 464 String key = keys.get(i); 465 if (m != null) { 466 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 467 Object value = parseAnything(vt, r, l, false, pMeta); 468 setName(vt, value, key); 469 m.put(convertToType(key, kt), value); 470 } 471 } 472 if (m != null && c != null) { 473 ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this)); 474 m2.put(getBeanTypePropertyName(type.getElementType()), c); 475 l.add((E)cast(m2, pMeta, elementType)); 476 } else { 477 l.add((E)m); 478 } 479 } 480 nextTag(r, xTR); 481 } 482 return l; 483 } 484 485 /* 486 * Reads contents of <table> element. 487 * Precondition: Must be pointing at event following <table> event. 488 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 489 */ 490 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 491 while (true) { 492 HtmlTag tag = nextTag(r, TR, xTABLE); 493 if (tag == xTABLE) 494 break; 495 tag = nextTag(r, TD, TH); 496 // Skip over the column headers. 497 if (tag == TH) { 498 skipTag(r); 499 r.nextTag(); 500 skipTag(r); 501 } else { 502 String key = getElementText(r); 503 nextTag(r, TD); 504 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 505 if (pMeta == null) { 506 onUnknownProperty(key, m); 507 parseAnything(object(), r, null, false, null); 508 } else { 509 ClassMeta<?> cm = pMeta.getClassMeta(); 510 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 511 setName(cm, value, key); 512 pMeta.set(m, key, value); 513 } 514 } 515 HtmlTag t = nextTag(r, xTD, xTR); 516 if (t == xTD) 517 nextTag(r, xTR); 518 } 519 return m; 520 } 521 522 /* 523 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 524 * it's not one of the expected tags. 525 * Precondition: Must be pointing before the event we want to parse. 526 * Postcondition: Pointing at the tag just parsed. 527 */ 528 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 529 int et = r.next(); 530 531 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 532 et = r.next(); 533 534 if (et == END_DOCUMENT) 535 throw new ParseException(this, "Unexpected end of document."); 536 537 HtmlTag tag = HtmlTag.forEvent(this, r); 538 if (expected.length == 0) 539 return tag; 540 for (HtmlTag t : expected) 541 if (t == tag) 542 return tag; 543 544 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 545 } 546 547 /* 548 * Skips over the current element and advances to the next element. 549 * <p> 550 * Precondition: Pointing to opening tag. 551 * Postcondition: Pointing to next opening tag. 552 * 553 * @param r The stream being read from. 554 * @throws XMLStreamException 555 */ 556 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 557 int et = r.getEventType(); 558 559 if (et != START_ELEMENT) 560 throw new ParseException(this, 561 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 562 XmlUtils.toReadableEvent(r) 563 ); 564 565 String n = r.getLocalName(); 566 567 int depth = 0; 568 while (true) { 569 et = r.next(); 570 if (et == START_ELEMENT) { 571 String n2 = r.getLocalName(); 572 if (n.equals(n2)) 573 depth++; 574 } else if (et == END_ELEMENT) { 575 String n2 = r.getLocalName(); 576 if (n.equals(n2)) 577 depth--; 578 if (depth < 0) 579 return; 580 } 581 } 582 } 583 584 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 585 HtmlTag tag = HtmlTag.forEvent(this, r); 586 if (tag.isOneOf(expected)) 587 r.next(); 588 else 589 throw new ParseException(this, 590 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 591 tag, expected); 592 } 593 594 private static int skipWs(XmlReader r) throws XMLStreamException { 595 int event = r.getEventType(); 596 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 597 event = r.next(); 598 return event; 599 } 600 601 /** 602 * Parses CHARACTERS data. 603 * 604 * <p> 605 * Precondition: Pointing to event immediately following opening tag. 606 * Postcondition: Pointing to closing tag. 607 * 608 * @param r The stream being read from. 609 * @return The parsed string. 610 * @throws XMLStreamException Thrown by underlying XML stream. 611 */ 612 @Override /* XmlParserSession */ 613 protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 614 615 StringBuilder sb = getStringBuilder(); 616 617 int et = r.getEventType(); 618 if (et == END_ELEMENT) 619 return ""; 620 621 int depth = 0; 622 623 String characters = null; 624 625 while (true) { 626 if (et == START_ELEMENT) { 627 if (characters != null) { 628 if (sb.length() == 0) 629 characters = trimStart(characters); 630 sb.append(characters); 631 characters = null; 632 } 633 HtmlTag tag = HtmlTag.forEvent(this, r); 634 if (tag == BR) { 635 sb.append('\n'); 636 r.nextTag(); 637 } else if (tag == BS) { 638 sb.append('\b'); 639 r.nextTag(); 640 } else if (tag == SP) { 641 et = r.next(); 642 if (et == CHARACTERS) { 643 String s = r.getText(); 644 if (s.length() > 0) { 645 char c = r.getText().charAt(0); 646 if (c == '\u2003') 647 c = '\t'; 648 sb.append(c); 649 } 650 r.nextTag(); 651 } 652 } else if (tag == FF) { 653 sb.append('\f'); 654 r.nextTag(); 655 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 656 et = r.next(); 657 if (et == CHARACTERS) { 658 sb.append(r.getText()); 659 r.nextTag(); 660 } 661 } else { 662 sb.append('<').append(r.getLocalName()); 663 for (int i = 0; i < r.getAttributeCount(); i++) 664 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 665 sb.append('>'); 666 depth++; 667 } 668 } else if (et == END_ELEMENT) { 669 if (characters != null) { 670 if (sb.length() == 0) 671 characters = trimStart(characters); 672 if (depth == 0) 673 characters = trimEnd(characters); 674 sb.append(characters); 675 characters = null; 676 } 677 if (depth == 0) 678 break; 679 sb.append('<').append(r.getLocalName()).append('>'); 680 depth--; 681 } else if (et == CHARACTERS) { 682 characters = r.getText(); 683 } 684 et = r.next(); 685 } 686 687 String s = trim(sb.toString()); 688 returnStringBuilder(sb); 689 return s; 690 } 691 692 /** 693 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 694 * 695 * <p> 696 * Precondition: Pointing to opening tag. 697 * Postcondition: Pointing to closing tag. 698 * 699 * @param r The stream being read from. 700 * @return The parsed string. 701 * @throws XMLStreamException Thrown by underlying XML stream. 702 * @throws ParseException Malformed input encountered. 703 */ 704 @Override /* XmlParserSession */ 705 protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 706 r.next(); 707 return parseText(r); 708 } 709 710 @Override /* XmlParserSession */ 711 protected final boolean isWhitespaceElement(XmlReader r) { 712 String s = r.getLocalName(); 713 return whitespaceElements.contains(s); 714 } 715 716 @Override /* XmlParserSession */ 717 protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 718 719 HtmlTag tag = HtmlTag.forEvent(this, r); 720 int et = r.next(); 721 if (tag == BR) { 722 return "\n"; 723 } else if (tag == BS) { 724 return "\b"; 725 } else if (tag == FF) { 726 return "\f"; 727 } else if (tag == SP) { 728 if (et == CHARACTERS) { 729 String s = r.getText(); 730 if (s.charAt(0) == '\u2003') 731 s = "\t"; 732 r.next(); 733 return decodeString(s); 734 } 735 return ""; 736 } else { 737 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 738 } 739 } 740 741 //----------------------------------------------------------------------------------------------------------------- 742 // Extended metadata 743 //----------------------------------------------------------------------------------------------------------------- 744 745 /** 746 * Returns the language-specific metadata on the specified class. 747 * 748 * @param cm The class to return the metadata on. 749 * @return The metadata. 750 */ 751 protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) { 752 return ctx.getHtmlClassMeta(cm); 753 } 754 755 /** 756 * Returns the language-specific metadata on the specified bean property. 757 * 758 * @param bpm The bean property to return the metadata on. 759 * @return The metadata. 760 */ 761 protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) { 762 return ctx.getHtmlBeanPropertyMeta(bpm); 763 } 764 765 //----------------------------------------------------------------------------------------------------------------- 766 // Other methods 767 //----------------------------------------------------------------------------------------------------------------- 768 769 @Override /* Session */ 770 public ObjectMap toMap() { 771 return super.toMap() 772 .append("HtmlParserSession", new DefaultFilteringObjectMap() 773 ); 774 } 775}