001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.io.IOException; 020import java.lang.reflect.*; 021import java.util.*; 022 023import javax.xml.stream.*; 024 025import org.apache.juneau.*; 026import org.apache.juneau.html.annotation.*; 027import org.apache.juneau.parser.*; 028import org.apache.juneau.transform.*; 029import org.apache.juneau.xml.*; 030 031/** 032 * Session object that lives for the duration of a single use of {@link HtmlParser}. 033 * 034 * <p> 035 * This class is NOT thread safe. 036 * It is typically discarded after one-time use although it can be reused against multiple inputs. 037 */ 038@SuppressWarnings({ "unchecked", "rawtypes" }) 039public final class HtmlParserSession extends XmlParserSession { 040 041 private static final Set<String> whitespaceElements = new HashSet<>( 042 Arrays.asList( 043 new String[]{"br","bs","sp","ff"} 044 ) 045 ); 046 047 /** 048 * Create a new session using properties specified in the context. 049 * 050 * @param ctx 051 * The context creating this session object. 052 * The context contains all the configuration settings for this object. 053 * @param args 054 * Runtime session arguments. 055 */ 056 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 057 super(ctx, args); 058 } 059 060 @Override /* ParserSession */ 061 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 062 try { 063 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 064 } catch (XMLStreamException e) { 065 throw new ParseException(e); 066 } 067 } 068 069 @Override /* ReaderParserSession */ 070 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 071 throws Exception { 072 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 073 (ClassMeta<V>)getClassMeta(valueType), null); 074 } 075 076 @Override /* ReaderParserSession */ 077 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 078 throws Exception { 079 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 080 } 081 082 /* 083 * Reads anything starting at the current event. 084 * <p> 085 * Precondition: Must be pointing at outer START_ELEMENT. 086 * Postcondition: Pointing at outer END_ELEMENT. 087 */ 088 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 089 090 if (eType == null) 091 eType = (ClassMeta<T>)object(); 092 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 093 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 094 ClassMeta<?> sType = null; 095 if (builder != null) 096 sType = builder.getBuilderClassMeta(this); 097 else if (swap != null) 098 sType = swap.getSwapClassMeta(this); 099 else 100 sType = eType; 101 102 if (sType.isOptional()) 103 return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 104 105 setCurrentClass(sType); 106 107 int event = r.getEventType(); 108 if (event != START_ELEMENT) 109 throw new ParseException(this, "parseAnything must be called on outer start element."); 110 111 if (! isRoot) 112 event = r.next(); 113 boolean isEmpty = (event == END_ELEMENT); 114 115 // Skip until we find a start element, end document, or non-empty text. 116 if (! isEmpty) 117 event = skipWs(r); 118 119 if (event == END_DOCUMENT) 120 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 121 122 // Handle @Html(asXml=true) beans. 123 HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class); 124 if (hcm.getFormat() == HtmlFormat.XML) 125 return super.parseAnything(eType, null, r, outer, false, pMeta); 126 127 Object o = null; 128 129 boolean isValid = true; 130 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 131 132 // If it's not a known tag, then parse it as XML. 133 // Allows us to parse stuff like "<div/>" into HTML5 beans. 134 if (tag == null && event != CHARACTERS) 135 return super.parseAnything(eType, null, r, outer, false, pMeta); 136 137 if (tag == HTML) 138 tag = skipToData(r); 139 140 if (isEmpty) { 141 o = ""; 142 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 143 String text = parseText(r); 144 if (sType.isObject() || sType.isCharSequence()) 145 o = text; 146 else if (sType.isChar()) 147 o = parseCharacter(text); 148 else if (sType.isBoolean()) 149 o = Boolean.parseBoolean(text); 150 else if (sType.isNumber()) 151 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 152 else if (sType.canCreateNewInstanceFromString(outer)) 153 o = sType.newInstanceFromString(outer, text); 154 else 155 isValid = false; 156 157 } else if (tag == STRING || (tag == A && pMeta != null 158 && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) { 159 String text = getElementText(r); 160 if (sType.isObject() || sType.isCharSequence()) 161 o = text; 162 else if (sType.isChar()) 163 o = parseCharacter(text); 164 else if (sType.canCreateNewInstanceFromString(outer)) 165 o = sType.newInstanceFromString(outer, text); 166 else 167 isValid = false; 168 skipTag(r, tag == STRING ? xSTRING : xA); 169 170 } else if (tag == NUMBER) { 171 String text = getElementText(r); 172 if (sType.isObject()) 173 o = parseNumber(text, Number.class); 174 else if (sType.isNumber()) 175 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 176 else 177 isValid = false; 178 skipTag(r, xNUMBER); 179 180 } else if (tag == BOOLEAN) { 181 String text = getElementText(r); 182 if (sType.isObject() || sType.isBoolean()) 183 o = Boolean.parseBoolean(text); 184 else 185 isValid = false; 186 skipTag(r, xBOOLEAN); 187 188 } else if (tag == P) { 189 String text = getElementText(r); 190 if (! "No Results".equals(text)) 191 isValid = false; 192 skipTag(r, xP); 193 194 } else if (tag == NULL) { 195 skipTag(r, NULL); 196 skipTag(r, xNULL); 197 198 } else if (tag == A) { 199 o = parseAnchor(r, eType); 200 skipTag(r, xA); 201 202 } else if (tag == TABLE) { 203 204 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 205 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 206 207 if (cm != null) { 208 sType = eType = cm; 209 typeName = sType.isCollectionOrArray() ? "array" : "object"; 210 } else if (! "array".equals(typeName)) { 211 // Type name could be a subtype name. 212 typeName = sType.isCollectionOrArray() ? "array" : "object"; 213 } 214 215 if (typeName.equals("object")) { 216 if (sType.isObject()) { 217 o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(), 218 pMeta); 219 } else if (sType.isMap()) { 220 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 221 : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 222 } else if (builder != null) { 223 BeanMap m = toBeanMap(builder.create(this, eType)); 224 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 225 } else if (sType.canCreateNewBean(outer)) { 226 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 227 o = parseIntoBean(r, m).getBean(); 228 } else { 229 isValid = false; 230 } 231 skipTag(r, xTABLE); 232 233 } else if (typeName.equals("array")) { 234 if (sType.isObject()) 235 o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta); 236 else if (sType.isCollection()) 237 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 238 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 239 else if (sType.isArray() || sType.isArgs()) { 240 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 241 o = toArray(sType, l); 242 } 243 else 244 isValid = false; 245 skipTag(r, xTABLE); 246 247 } else { 248 isValid = false; 249 } 250 251 } else if (tag == UL) { 252 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 253 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 254 if (cm != null) 255 sType = eType = cm; 256 257 if (sType.isObject()) 258 o = parseIntoCollection(r, new ObjectList(this), sType, pMeta); 259 else if (sType.isCollection() || sType.isObject()) 260 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 261 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 262 else if (sType.isArray() || sType.isArgs()) 263 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 264 else 265 isValid = false; 266 skipTag(r, xUL); 267 268 } 269 270 if (! isValid) 271 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 272 273 if (swap != null && o != null) 274 o = unswap(swap, o, eType); 275 276 if (outer != null) 277 setParent(eType, o, outer); 278 279 skipWs(r); 280 return (T)o; 281 } 282 283 /* 284 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 285 */ 286 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 287 while (true) { 288 int event = r.next(); 289 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 290 r.nextTag(); 291 event = r.getEventType(); 292 boolean isEmpty = (event == END_ELEMENT); 293 // Skip until we find a start element, end document, or non-empty text. 294 if (! isEmpty) 295 event = skipWs(r); 296 if (event == END_DOCUMENT) 297 throw new ParseException(this, "Unexpected end of stream looking for data."); 298 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 299 } 300 } 301 } 302 303 private static String getAttribute(XmlReader r, String name, String def) { 304 for (int i = 0; i < r.getAttributeCount(); i++) 305 if (r.getAttributeLocalName(i).equals(name)) 306 return r.getAttributeValue(i); 307 return def; 308 } 309 310 /* 311 * Reads an anchor tag and converts it into a bean. 312 */ 313 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 314 throws IOException, ParseException, XMLStreamException { 315 String href = r.getAttributeValue(null, "href"); 316 String name = getElementText(r); 317 Class<T> beanClass = beanType.getInnerClass(); 318 if (beanClass.isAnnotationPresent(HtmlLink.class)) { 319 HtmlLink h = beanClass.getAnnotation(HtmlLink.class); 320 BeanMap<T> m = newBeanMap(beanClass); 321 m.put(h.uriProperty(), href); 322 m.put(h.nameProperty(), name); 323 return m.getBean(); 324 } 325 return convertToType(href, beanType); 326 } 327 328 private static Map<String,String> getAttributes(XmlReader r) { 329 Map<String,String> m = new TreeMap<>() ; 330 for (int i = 0; i < r.getAttributeCount(); i++) 331 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 332 return m; 333 } 334 335 /* 336 * Reads contents of <table> element. 337 * Precondition: Must be pointing at <table> event. 338 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 339 */ 340 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 341 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 342 while (true) { 343 HtmlTag tag = nextTag(r, TR, xTABLE); 344 if (tag == xTABLE) 345 break; 346 tag = nextTag(r, TD, TH); 347 // Skip over the column headers. 348 if (tag == TH) { 349 skipTag(r); 350 r.nextTag(); 351 skipTag(r); 352 } else { 353 K key = parseAnything(keyType, r, m, false, pMeta); 354 nextTag(r, TD); 355 V value = parseAnything(valueType, r, m, false, pMeta); 356 setName(valueType, value, key); 357 m.put(key, value); 358 } 359 nextTag(r, xTR); 360 } 361 362 return m; 363 } 364 365 /* 366 * Reads contents of <ul> element. 367 * Precondition: Must be pointing at event following <ul> event. 368 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 369 */ 370 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 371 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 372 int argIndex = 0; 373 while (true) { 374 HtmlTag tag = nextTag(r, LI, xUL); 375 if (tag == xUL) 376 break; 377 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 378 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 379 } 380 return l; 381 } 382 383 /* 384 * Reads contents of <ul> element. 385 * Precondition: Must be pointing at event following <ul> event. 386 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 387 */ 388 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 389 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 390 391 HtmlTag tag = nextTag(r, TR); 392 List<String> keys = new ArrayList<>(); 393 while (true) { 394 tag = nextTag(r, TH, xTR); 395 if (tag == xTR) 396 break; 397 keys.add(getElementText(r)); 398 } 399 400 int argIndex = 0; 401 402 while (true) { 403 r.nextTag(); 404 tag = HtmlTag.forEvent(this, r); 405 if (tag == xTABLE) 406 break; 407 408 ClassMeta elementType = null; 409 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 410 if (beanType != null) 411 elementType = getClassMeta(beanType, pMeta, null); 412 if (elementType == null) 413 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 414 if (elementType == null) 415 elementType = object(); 416 417 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 418 419 if (builder != null || elementType.canCreateNewBean(l)) { 420 BeanMap m = 421 builder != null 422 ? toBeanMap(builder.create(this, elementType)) 423 : newBeanMap(l, elementType.getInnerClass()) 424 ; 425 for (int i = 0; i < keys.size(); i++) { 426 tag = nextTag(r, TD, NULL); 427 if (tag == NULL) { 428 m = null; 429 nextTag(r, xNULL); 430 break; 431 } 432 String key = keys.get(i); 433 BeanMapEntry e = m.getProperty(key); 434 if (e == null) { 435 //onUnknownProperty(key, m, -1, -1); 436 parseAnything(object(), r, l, false, null); 437 } else { 438 BeanPropertyMeta bpm = e.getMeta(); 439 ClassMeta<?> cm = bpm.getClassMeta(); 440 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 441 setName(cm, value, key); 442 bpm.set(m, key, value); 443 } 444 } 445 l.add( 446 m == null 447 ? null 448 : builder != null 449 ? builder.build(this, m.getBean(), elementType) 450 : (E)m.getBean() 451 ); 452 } else { 453 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 454 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 455 : new ObjectMap(this)); 456 for (int i = 0; i < keys.size(); i++) { 457 tag = nextTag(r, TD, NULL); 458 if (tag == NULL) { 459 m = null; 460 nextTag(r, xNULL); 461 break; 462 } 463 String key = keys.get(i); 464 if (m != null) { 465 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 466 Object value = parseAnything(vt, r, l, false, pMeta); 467 setName(vt, value, key); 468 m.put(convertToType(key, kt), value); 469 } 470 } 471 if (m != null && c != null) { 472 ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this)); 473 m2.put(getBeanTypePropertyName(type.getElementType()), c); 474 l.add((E)cast(m2, pMeta, elementType)); 475 } else { 476 l.add((E)m); 477 } 478 } 479 nextTag(r, xTR); 480 } 481 return l; 482 } 483 484 /* 485 * Reads contents of <table> element. 486 * Precondition: Must be pointing at event following <table> event. 487 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 488 */ 489 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 490 while (true) { 491 HtmlTag tag = nextTag(r, TR, xTABLE); 492 if (tag == xTABLE) 493 break; 494 tag = nextTag(r, TD, TH); 495 // Skip over the column headers. 496 if (tag == TH) { 497 skipTag(r); 498 r.nextTag(); 499 skipTag(r); 500 } else { 501 String key = getElementText(r); 502 nextTag(r, TD); 503 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 504 if (pMeta == null) { 505 onUnknownProperty(key, m); 506 parseAnything(object(), r, null, false, null); 507 } else { 508 ClassMeta<?> cm = pMeta.getClassMeta(); 509 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 510 setName(cm, value, key); 511 pMeta.set(m, key, value); 512 } 513 } 514 nextTag(r, xTR); 515 } 516 return m; 517 } 518 519 /* 520 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 521 * it's not one of the expected tags. 522 * Precondition: Must be pointing before the event we want to parse. 523 * Postcondition: Pointing at the tag just parsed. 524 */ 525 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 526 int et = r.next(); 527 528 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 529 et = r.next(); 530 531 if (et == END_DOCUMENT) 532 throw new ParseException(this, "Unexpected end of document."); 533 534 HtmlTag tag = HtmlTag.forEvent(this, r); 535 if (expected.length == 0) 536 return tag; 537 for (HtmlTag t : expected) 538 if (t == tag) 539 return tag; 540 541 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 542 } 543 544 /* 545 * Skips over the current element and advances to the next element. 546 * <p> 547 * Precondition: Pointing to opening tag. 548 * Postcondition: Pointing to next opening tag. 549 * 550 * @param r The stream being read from. 551 * @throws XMLStreamException 552 */ 553 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 554 int et = r.getEventType(); 555 556 if (et != START_ELEMENT) 557 throw new ParseException(this, 558 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 559 XmlUtils.toReadableEvent(r) 560 ); 561 562 String n = r.getLocalName(); 563 564 int depth = 0; 565 while (true) { 566 et = r.next(); 567 if (et == START_ELEMENT) { 568 String n2 = r.getLocalName(); 569 if (n.equals(n2)) 570 depth++; 571 } else if (et == END_ELEMENT) { 572 String n2 = r.getLocalName(); 573 if (n.equals(n2)) 574 depth--; 575 if (depth < 0) 576 return; 577 } 578 } 579 } 580 581 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 582 HtmlTag tag = HtmlTag.forEvent(this, r); 583 if (tag.isOneOf(expected)) 584 r.next(); 585 else 586 throw new ParseException(this, 587 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 588 tag, expected); 589 } 590 591 private static int skipWs(XmlReader r) throws XMLStreamException { 592 int event = r.getEventType(); 593 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 594 event = r.next(); 595 return event; 596 } 597 598 /** 599 * Parses CHARACTERS data. 600 * 601 * <p> 602 * Precondition: Pointing to event immediately following opening tag. 603 * Postcondition: Pointing to closing tag. 604 * 605 * @param r The stream being read from. 606 * @return The parsed string. 607 * @throws XMLStreamException Thrown by underlying XML stream. 608 */ 609 @Override /* XmlParserSession */ 610 protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 611 612 StringBuilder sb = getStringBuilder(); 613 614 int et = r.getEventType(); 615 if (et == END_ELEMENT) 616 return ""; 617 618 int depth = 0; 619 620 String characters = null; 621 622 while (true) { 623 if (et == START_ELEMENT) { 624 if (characters != null) { 625 if (sb.length() == 0) 626 characters = trimStart(characters); 627 sb.append(characters); 628 characters = null; 629 } 630 HtmlTag tag = HtmlTag.forEvent(this, r); 631 if (tag == BR) { 632 sb.append('\n'); 633 r.nextTag(); 634 } else if (tag == BS) { 635 sb.append('\b'); 636 r.nextTag(); 637 } else if (tag == SP) { 638 et = r.next(); 639 if (et == CHARACTERS) { 640 String s = r.getText(); 641 if (s.length() > 0) { 642 char c = r.getText().charAt(0); 643 if (c == '\u2003') 644 c = '\t'; 645 sb.append(c); 646 } 647 r.nextTag(); 648 } 649 } else if (tag == FF) { 650 sb.append('\f'); 651 r.nextTag(); 652 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 653 et = r.next(); 654 if (et == CHARACTERS) { 655 sb.append(r.getText()); 656 r.nextTag(); 657 } 658 } else { 659 sb.append('<').append(r.getLocalName()); 660 for (int i = 0; i < r.getAttributeCount(); i++) 661 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 662 sb.append('>'); 663 depth++; 664 } 665 } else if (et == END_ELEMENT) { 666 if (characters != null) { 667 if (sb.length() == 0) 668 characters = trimStart(characters); 669 if (depth == 0) 670 characters = trimEnd(characters); 671 sb.append(characters); 672 characters = null; 673 } 674 if (depth == 0) 675 break; 676 sb.append('<').append(r.getLocalName()).append('>'); 677 depth--; 678 } else if (et == CHARACTERS) { 679 characters = r.getText(); 680 } 681 et = r.next(); 682 } 683 684 String s = trim(sb.toString()); 685 returnStringBuilder(sb); 686 return s; 687 } 688 689 /** 690 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 691 * 692 * <p> 693 * Precondition: Pointing to opening tag. 694 * Postcondition: Pointing to closing tag. 695 * 696 * @param r The stream being read from. 697 * @return The parsed string. 698 * @throws XMLStreamException Thrown by underlying XML stream. 699 * @throws ParseException Malformed input encountered. 700 */ 701 @Override /* XmlParserSession */ 702 protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 703 r.next(); 704 return parseText(r); 705 } 706 707 @Override /* XmlParserSession */ 708 protected final boolean isWhitespaceElement(XmlReader r) { 709 String s = r.getLocalName(); 710 return whitespaceElements.contains(s); 711 } 712 713 @Override /* XmlParserSession */ 714 protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 715 716 HtmlTag tag = HtmlTag.forEvent(this, r); 717 int et = r.next(); 718 if (tag == BR) { 719 return "\n"; 720 } else if (tag == BS) { 721 return "\b"; 722 } else if (tag == FF) { 723 return "\f"; 724 } else if (tag == SP) { 725 if (et == CHARACTERS) { 726 String s = r.getText(); 727 if (s.charAt(0) == '\u2003') 728 s = "\t"; 729 r.next(); 730 return decodeString(s); 731 } 732 return ""; 733 } else { 734 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 735 } 736 } 737 738 //----------------------------------------------------------------------------------------------------------------- 739 // Other methods 740 //----------------------------------------------------------------------------------------------------------------- 741 742 @Override /* Session */ 743 public ObjectMap toMap() { 744 return super.toMap() 745 .append("HtmlParserSession", new DefaultFilteringObjectMap() 746 ); 747 } 748}