001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.io.IOException; 020import java.lang.reflect.*; 021import java.util.*; 022 023import javax.xml.stream.*; 024 025import org.apache.juneau.*; 026import org.apache.juneau.html.annotation.*; 027import org.apache.juneau.parser.*; 028import org.apache.juneau.transform.*; 029import org.apache.juneau.xml.*; 030 031/** 032 * Session object that lives for the duration of a single use of {@link HtmlParser}. 033 * 034 * <p> 035 * This class is NOT thread safe. 036 * It is typically discarded after one-time use although it can be reused against multiple inputs. 037 */ 038@SuppressWarnings({ "unchecked", "rawtypes" }) 039public final class HtmlParserSession extends XmlParserSession { 040 041 private static final Set<String> whitespaceElements = new HashSet<>( 042 Arrays.asList( 043 new String[]{"br","bs","sp","ff"} 044 ) 045 ); 046 047 /** 048 * Create a new session using properties specified in the context. 049 * 050 * @param ctx 051 * The context creating this session object. 052 * The context contains all the configuration settings for this object. 053 * @param args 054 * Runtime session arguments. 055 */ 056 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 057 super(ctx, args); 058 } 059 060 @Override /* ParserSession */ 061 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 062 try { 063 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 064 } catch (XMLStreamException e) { 065 throw new ParseException(e); 066 } 067 } 068 069 @Override /* ReaderParserSession */ 070 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 071 throws Exception { 072 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 073 (ClassMeta<V>)getClassMeta(valueType), null); 074 } 075 076 @Override /* ReaderParserSession */ 077 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 078 throws Exception { 079 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 080 } 081 082 /* 083 * Reads anything starting at the current event. 084 * <p> 085 * Precondition: Must be pointing at outer START_ELEMENT. 086 * Postcondition: Pointing at outer END_ELEMENT. 087 */ 088 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 089 090 if (eType == null) 091 eType = (ClassMeta<T>)object(); 092 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 093 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 094 ClassMeta<?> sType = null; 095 if (builder != null) 096 sType = builder.getBuilderClassMeta(this); 097 else if (swap != null) 098 sType = swap.getSwapClassMeta(this); 099 else 100 sType = eType; 101 setCurrentClass(sType); 102 103 int event = r.getEventType(); 104 if (event != START_ELEMENT) 105 throw new ParseException(this, "parseAnything must be called on outer start element."); 106 107 if (! isRoot) 108 event = r.next(); 109 boolean isEmpty = (event == END_ELEMENT); 110 111 // Skip until we find a start element, end document, or non-empty text. 112 if (! isEmpty) 113 event = skipWs(r); 114 115 if (event == END_DOCUMENT) 116 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 117 118 // Handle @Html(asXml=true) beans. 119 HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class); 120 if (hcm.getFormat() == HtmlFormat.XML) 121 return super.parseAnything(eType, null, r, outer, false, pMeta); 122 123 Object o = null; 124 125 boolean isValid = true; 126 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 127 128 // If it's not a known tag, then parse it as XML. 129 // Allows us to parse stuff like "<div/>" into HTML5 beans. 130 if (tag == null && event != CHARACTERS) 131 return super.parseAnything(eType, null, r, outer, false, pMeta); 132 133 if (tag == HTML) 134 tag = skipToData(r); 135 136 if (isEmpty) { 137 o = ""; 138 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 139 String text = parseText(r); 140 if (sType.isObject() || sType.isCharSequence()) 141 o = text; 142 else if (sType.isChar()) 143 o = parseCharacter(text); 144 else if (sType.isBoolean()) 145 o = Boolean.parseBoolean(text); 146 else if (sType.isNumber()) 147 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 148 else if (sType.canCreateNewInstanceFromString(outer)) 149 o = sType.newInstanceFromString(outer, text); 150 else 151 isValid = false; 152 153 } else if (tag == STRING || (tag == A && pMeta != null 154 && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) { 155 String text = getElementText(r); 156 if (sType.isObject() || sType.isCharSequence()) 157 o = text; 158 else if (sType.isChar()) 159 o = parseCharacter(text); 160 else if (sType.canCreateNewInstanceFromString(outer)) 161 o = sType.newInstanceFromString(outer, text); 162 else 163 isValid = false; 164 skipTag(r, tag == STRING ? xSTRING : xA); 165 166 } else if (tag == NUMBER) { 167 String text = getElementText(r); 168 if (sType.isObject()) 169 o = parseNumber(text, Number.class); 170 else if (sType.isNumber()) 171 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 172 else 173 isValid = false; 174 skipTag(r, xNUMBER); 175 176 } else if (tag == BOOLEAN) { 177 String text = getElementText(r); 178 if (sType.isObject() || sType.isBoolean()) 179 o = Boolean.parseBoolean(text); 180 else 181 isValid = false; 182 skipTag(r, xBOOLEAN); 183 184 } else if (tag == P) { 185 String text = getElementText(r); 186 if (! "No Results".equals(text)) 187 isValid = false; 188 skipTag(r, xP); 189 190 } else if (tag == NULL) { 191 skipTag(r, NULL); 192 skipTag(r, xNULL); 193 194 } else if (tag == A) { 195 o = parseAnchor(r, eType); 196 skipTag(r, xA); 197 198 } else if (tag == TABLE) { 199 200 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 201 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 202 203 if (cm != null) { 204 sType = eType = cm; 205 typeName = sType.isCollectionOrArray() ? "array" : "object"; 206 } else if (! "array".equals(typeName)) { 207 // Type name could be a subtype name. 208 typeName = sType.isCollectionOrArray() ? "array" : "object"; 209 } 210 211 if (typeName.equals("object")) { 212 if (sType.isObject()) { 213 o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(), 214 pMeta); 215 } else if (sType.isMap()) { 216 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 217 : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 218 } else if (builder != null) { 219 BeanMap m = toBeanMap(builder.create(this, eType)); 220 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 221 } else if (sType.canCreateNewBean(outer)) { 222 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 223 o = parseIntoBean(r, m).getBean(); 224 } else { 225 isValid = false; 226 } 227 skipTag(r, xTABLE); 228 229 } else if (typeName.equals("array")) { 230 if (sType.isObject()) 231 o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta); 232 else if (sType.isCollection()) 233 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 234 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 235 else if (sType.isArray() || sType.isArgs()) { 236 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 237 o = toArray(sType, l); 238 } 239 else 240 isValid = false; 241 skipTag(r, xTABLE); 242 243 } else { 244 isValid = false; 245 } 246 247 } else if (tag == UL) { 248 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 249 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 250 if (cm != null) 251 sType = eType = cm; 252 253 if (sType.isObject()) 254 o = parseIntoCollection(r, new ObjectList(this), sType, pMeta); 255 else if (sType.isCollection() || sType.isObject()) 256 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 257 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 258 else if (sType.isArray() || sType.isArgs()) 259 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 260 else 261 isValid = false; 262 skipTag(r, xUL); 263 264 } 265 266 if (! isValid) 267 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 268 269 if (swap != null && o != null) 270 o = unswap(swap, o, eType); 271 272 if (outer != null) 273 setParent(eType, o, outer); 274 275 skipWs(r); 276 return (T)o; 277 } 278 279 /* 280 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 281 */ 282 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 283 while (true) { 284 int event = r.next(); 285 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 286 r.nextTag(); 287 event = r.getEventType(); 288 boolean isEmpty = (event == END_ELEMENT); 289 // Skip until we find a start element, end document, or non-empty text. 290 if (! isEmpty) 291 event = skipWs(r); 292 if (event == END_DOCUMENT) 293 throw new ParseException(this, "Unexpected end of stream looking for data."); 294 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 295 } 296 } 297 } 298 299 private static String getAttribute(XmlReader r, String name, String def) { 300 for (int i = 0; i < r.getAttributeCount(); i++) 301 if (r.getAttributeLocalName(i).equals(name)) 302 return r.getAttributeValue(i); 303 return def; 304 } 305 306 /* 307 * Reads an anchor tag and converts it into a bean. 308 */ 309 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 310 throws IOException, ParseException, XMLStreamException { 311 String href = r.getAttributeValue(null, "href"); 312 String name = getElementText(r); 313 Class<T> beanClass = beanType.getInnerClass(); 314 if (beanClass.isAnnotationPresent(HtmlLink.class)) { 315 HtmlLink h = beanClass.getAnnotation(HtmlLink.class); 316 BeanMap<T> m = newBeanMap(beanClass); 317 m.put(h.uriProperty(), href); 318 m.put(h.nameProperty(), name); 319 return m.getBean(); 320 } 321 return convertToType(href, beanType); 322 } 323 324 private static Map<String,String> getAttributes(XmlReader r) { 325 Map<String,String> m = new TreeMap<>() ; 326 for (int i = 0; i < r.getAttributeCount(); i++) 327 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 328 return m; 329 } 330 331 /* 332 * Reads contents of <table> element. 333 * Precondition: Must be pointing at <table> event. 334 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 335 */ 336 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 337 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 338 while (true) { 339 HtmlTag tag = nextTag(r, TR, xTABLE); 340 if (tag == xTABLE) 341 break; 342 tag = nextTag(r, TD, TH); 343 // Skip over the column headers. 344 if (tag == TH) { 345 skipTag(r); 346 r.nextTag(); 347 skipTag(r); 348 } else { 349 K key = parseAnything(keyType, r, m, false, pMeta); 350 nextTag(r, TD); 351 V value = parseAnything(valueType, r, m, false, pMeta); 352 setName(valueType, value, key); 353 m.put(key, value); 354 } 355 nextTag(r, xTR); 356 } 357 358 return m; 359 } 360 361 /* 362 * Reads contents of <ul> element. 363 * Precondition: Must be pointing at event following <ul> event. 364 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 365 */ 366 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 367 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 368 int argIndex = 0; 369 while (true) { 370 HtmlTag tag = nextTag(r, LI, xUL); 371 if (tag == xUL) 372 break; 373 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 374 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 375 } 376 return l; 377 } 378 379 /* 380 * Reads contents of <ul> element. 381 * Precondition: Must be pointing at event following <ul> event. 382 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 383 */ 384 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 385 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 386 387 HtmlTag tag = nextTag(r, TR); 388 List<String> keys = new ArrayList<>(); 389 while (true) { 390 tag = nextTag(r, TH, xTR); 391 if (tag == xTR) 392 break; 393 keys.add(getElementText(r)); 394 } 395 396 int argIndex = 0; 397 398 while (true) { 399 r.nextTag(); 400 tag = HtmlTag.forEvent(this, r); 401 if (tag == xTABLE) 402 break; 403 404 ClassMeta elementType = null; 405 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 406 if (beanType != null) 407 elementType = getClassMeta(beanType, pMeta, null); 408 if (elementType == null) 409 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 410 if (elementType == null) 411 elementType = object(); 412 413 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 414 415 if (builder != null || elementType.canCreateNewBean(l)) { 416 BeanMap m = 417 builder != null 418 ? toBeanMap(builder.create(this, elementType)) 419 : newBeanMap(l, elementType.getInnerClass()) 420 ; 421 for (int i = 0; i < keys.size(); i++) { 422 tag = nextTag(r, TD, NULL); 423 if (tag == NULL) { 424 m = null; 425 nextTag(r, xNULL); 426 break; 427 } 428 String key = keys.get(i); 429 BeanMapEntry e = m.getProperty(key); 430 if (e == null) { 431 //onUnknownProperty(key, m, -1, -1); 432 parseAnything(object(), r, l, false, null); 433 } else { 434 BeanPropertyMeta bpm = e.getMeta(); 435 ClassMeta<?> cm = bpm.getClassMeta(); 436 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 437 setName(cm, value, key); 438 bpm.set(m, key, value); 439 } 440 } 441 l.add( 442 m == null 443 ? null 444 : builder != null 445 ? builder.build(this, m.getBean(), elementType) 446 : (E)m.getBean() 447 ); 448 } else { 449 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 450 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 451 : new ObjectMap(this)); 452 for (int i = 0; i < keys.size(); i++) { 453 tag = nextTag(r, TD, NULL); 454 if (tag == NULL) { 455 m = null; 456 nextTag(r, xNULL); 457 break; 458 } 459 String key = keys.get(i); 460 if (m != null) { 461 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 462 Object value = parseAnything(vt, r, l, false, pMeta); 463 setName(vt, value, key); 464 m.put(convertToType(key, kt), value); 465 } 466 } 467 if (m != null && c != null) { 468 ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this)); 469 m2.put(getBeanTypePropertyName(type.getElementType()), c); 470 l.add((E)cast(m2, pMeta, elementType)); 471 } else { 472 l.add((E)m); 473 } 474 } 475 nextTag(r, xTR); 476 } 477 return l; 478 } 479 480 /* 481 * Reads contents of <table> element. 482 * Precondition: Must be pointing at event following <table> event. 483 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 484 */ 485 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 486 while (true) { 487 HtmlTag tag = nextTag(r, TR, xTABLE); 488 if (tag == xTABLE) 489 break; 490 tag = nextTag(r, TD, TH); 491 // Skip over the column headers. 492 if (tag == TH) { 493 skipTag(r); 494 r.nextTag(); 495 skipTag(r); 496 } else { 497 String key = getElementText(r); 498 nextTag(r, TD); 499 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 500 if (pMeta == null) { 501 onUnknownProperty(key, m); 502 parseAnything(object(), r, null, false, null); 503 } else { 504 ClassMeta<?> cm = pMeta.getClassMeta(); 505 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 506 setName(cm, value, key); 507 pMeta.set(m, key, value); 508 } 509 } 510 nextTag(r, xTR); 511 } 512 return m; 513 } 514 515 /* 516 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 517 * it's not one of the expected tags. 518 * Precondition: Must be pointing before the event we want to parse. 519 * Postcondition: Pointing at the tag just parsed. 520 */ 521 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 522 int et = r.next(); 523 524 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 525 et = r.next(); 526 527 if (et == END_DOCUMENT) 528 throw new ParseException(this, "Unexpected end of document."); 529 530 HtmlTag tag = HtmlTag.forEvent(this, r); 531 if (expected.length == 0) 532 return tag; 533 for (HtmlTag t : expected) 534 if (t == tag) 535 return tag; 536 537 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 538 } 539 540 /* 541 * Skips over the current element and advances to the next element. 542 * <p> 543 * Precondition: Pointing to opening tag. 544 * Postcondition: Pointing to next opening tag. 545 * 546 * @param r The stream being read from. 547 * @throws XMLStreamException 548 */ 549 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 550 int et = r.getEventType(); 551 552 if (et != START_ELEMENT) 553 throw new ParseException(this, 554 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 555 XmlUtils.toReadableEvent(r) 556 ); 557 558 String n = r.getLocalName(); 559 560 int depth = 0; 561 while (true) { 562 et = r.next(); 563 if (et == START_ELEMENT) { 564 String n2 = r.getLocalName(); 565 if (n.equals(n2)) 566 depth++; 567 } else if (et == END_ELEMENT) { 568 String n2 = r.getLocalName(); 569 if (n.equals(n2)) 570 depth--; 571 if (depth < 0) 572 return; 573 } 574 } 575 } 576 577 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 578 HtmlTag tag = HtmlTag.forEvent(this, r); 579 if (tag.isOneOf(expected)) 580 r.next(); 581 else 582 throw new ParseException(this, 583 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 584 tag, expected); 585 } 586 587 private static int skipWs(XmlReader r) throws XMLStreamException { 588 int event = r.getEventType(); 589 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 590 event = r.next(); 591 return event; 592 } 593 594 /** 595 * Parses CHARACTERS data. 596 * 597 * <p> 598 * Precondition: Pointing to event immediately following opening tag. 599 * Postcondition: Pointing to closing tag. 600 * 601 * @param r The stream being read from. 602 * @return The parsed string. 603 * @throws XMLStreamException Thrown by underlying XML stream. 604 */ 605 @Override /* XmlParserSession */ 606 protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 607 608 StringBuilder sb = getStringBuilder(); 609 610 int et = r.getEventType(); 611 if (et == END_ELEMENT) 612 return ""; 613 614 int depth = 0; 615 616 String characters = null; 617 618 while (true) { 619 if (et == START_ELEMENT) { 620 if (characters != null) { 621 if (sb.length() == 0) 622 characters = trimStart(characters); 623 sb.append(characters); 624 characters = null; 625 } 626 HtmlTag tag = HtmlTag.forEvent(this, r); 627 if (tag == BR) { 628 sb.append('\n'); 629 r.nextTag(); 630 } else if (tag == BS) { 631 sb.append('\b'); 632 r.nextTag(); 633 } else if (tag == SP) { 634 et = r.next(); 635 if (et == CHARACTERS) { 636 String s = r.getText(); 637 if (s.length() > 0) { 638 char c = r.getText().charAt(0); 639 if (c == '\u2003') 640 c = '\t'; 641 sb.append(c); 642 } 643 r.nextTag(); 644 } 645 } else if (tag == FF) { 646 sb.append('\f'); 647 r.nextTag(); 648 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 649 et = r.next(); 650 if (et == CHARACTERS) { 651 sb.append(r.getText()); 652 r.nextTag(); 653 } 654 } else { 655 sb.append('<').append(r.getLocalName()); 656 for (int i = 0; i < r.getAttributeCount(); i++) 657 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 658 sb.append('>'); 659 depth++; 660 } 661 } else if (et == END_ELEMENT) { 662 if (characters != null) { 663 if (sb.length() == 0) 664 characters = trimStart(characters); 665 if (depth == 0) 666 characters = trimEnd(characters); 667 sb.append(characters); 668 characters = null; 669 } 670 if (depth == 0) 671 break; 672 sb.append('<').append(r.getLocalName()).append('>'); 673 depth--; 674 } else if (et == CHARACTERS) { 675 characters = r.getText(); 676 } 677 et = r.next(); 678 } 679 680 String s = trim(sb.toString()); 681 returnStringBuilder(sb); 682 return s; 683 } 684 685 /** 686 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 687 * 688 * <p> 689 * Precondition: Pointing to opening tag. 690 * Postcondition: Pointing to closing tag. 691 * 692 * @param r The stream being read from. 693 * @return The parsed string. 694 * @throws XMLStreamException Thrown by underlying XML stream. 695 * @throws ParseException Malformed input encountered. 696 */ 697 @Override /* XmlParserSession */ 698 protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 699 r.next(); 700 return parseText(r); 701 } 702 703 @Override /* XmlParserSession */ 704 protected final boolean isWhitespaceElement(XmlReader r) { 705 String s = r.getLocalName(); 706 return whitespaceElements.contains(s); 707 } 708 709 @Override /* XmlParserSession */ 710 protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 711 712 HtmlTag tag = HtmlTag.forEvent(this, r); 713 int et = r.next(); 714 if (tag == BR) { 715 return "\n"; 716 } else if (tag == BS) { 717 return "\b"; 718 } else if (tag == FF) { 719 return "\f"; 720 } else if (tag == SP) { 721 if (et == CHARACTERS) { 722 String s = r.getText(); 723 if (s.charAt(0) == '\u2003') 724 s = "\t"; 725 r.next(); 726 return decodeString(s); 727 } 728 return ""; 729 } else { 730 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 731 } 732 } 733 734 //----------------------------------------------------------------------------------------------------------------- 735 // Other methods 736 //----------------------------------------------------------------------------------------------------------------- 737 738 @Override /* Session */ 739 public ObjectMap toMap() { 740 return super.toMap() 741 .append("HtmlParserSession", new DefaultFilteringObjectMap() 742 ); 743 } 744}