001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.lang.reflect.*; 020import java.util.*; 021 022import javax.xml.stream.*; 023 024import org.apache.juneau.*; 025import org.apache.juneau.html.annotation.*; 026import org.apache.juneau.parser.*; 027import org.apache.juneau.transform.*; 028import org.apache.juneau.xml.*; 029 030/** 031 * Session object that lives for the duration of a single use of {@link HtmlParser}. 032 * 033 * <p> 034 * This class is NOT thread safe. 035 * It is typically discarded after one-time use although it can be reused against multiple inputs. 036 */ 037@SuppressWarnings({ "unchecked", "rawtypes" }) 038public final class HtmlParserSession extends XmlParserSession { 039 040 private static final Set<String> whitespaceElements = new HashSet<>( 041 Arrays.asList( 042 new String[]{"br","bs","sp","ff"} 043 ) 044 ); 045 046 /** 047 * Create a new session using properties specified in the context. 048 * 049 * @param ctx 050 * The context creating this session object. 051 * The context contains all the configuration settings for this object. 052 * @param args 053 * Runtime session arguments. 054 */ 055 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 056 super(ctx, args); 057 } 058 059 @Override /* ParserSession */ 060 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws Exception { 061 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 062 } 063 064 @Override /* ReaderParserSession */ 065 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 066 throws Exception { 067 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 068 (ClassMeta<V>)getClassMeta(valueType), null); 069 } 070 071 @Override /* ReaderParserSession */ 072 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 073 throws Exception { 074 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 075 } 076 077 /* 078 * Reads anything starting at the current event. 079 * <p> 080 * Precondition: Must be pointing at outer START_ELEMENT. 081 * Postcondition: Pointing at outer END_ELEMENT. 082 */ 083 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws Exception { 084 085 if (eType == null) 086 eType = (ClassMeta<T>)object(); 087 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 088 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 089 ClassMeta<?> sType = null; 090 if (builder != null) 091 sType = builder.getBuilderClassMeta(this); 092 else if (swap != null) 093 sType = swap.getSwapClassMeta(this); 094 else 095 sType = eType; 096 setCurrentClass(sType); 097 098 int event = r.getEventType(); 099 if (event != START_ELEMENT) 100 throw new XmlParseException(r.getLocation(), "parseAnything must be called on outer start element."); 101 102 if (! isRoot) 103 event = r.next(); 104 boolean isEmpty = (event == END_ELEMENT); 105 106 // Skip until we find a start element, end document, or non-empty text. 107 if (! isEmpty) 108 event = skipWs(r); 109 110 if (event == END_DOCUMENT) 111 throw new XmlParseException(r.getLocation(), "Unexpected end of stream in parseAnything for type ''{0}''", eType); 112 113 // Handle @Html(asXml=true) beans. 114 HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class); 115 if (hcm.isAsXml()) 116 return super.parseAnything(eType, null, r, outer, false, pMeta); 117 118 Object o = null; 119 120 boolean isValid = true; 121 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 122 123 // If it's not a known tag, then parse it as XML. 124 // Allows us to parse stuff like "<div/>" into HTML5 beans. 125 if (tag == null && event != CHARACTERS) 126 return super.parseAnything(eType, null, r, outer, false, pMeta); 127 128 if (tag == HTML) 129 tag = skipToData(r); 130 131 if (isEmpty) { 132 o = ""; 133 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 134 String text = parseText(r); 135 if (sType.isObject() || sType.isCharSequence()) 136 o = text; 137 else if (sType.isChar()) 138 o = text.charAt(0); 139 else if (sType.isBoolean()) 140 o = Boolean.parseBoolean(text); 141 else if (sType.isNumber()) 142 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 143 else if (sType.canCreateNewInstanceFromString(outer)) 144 o = sType.newInstanceFromString(outer, text); 145 else if (sType.canCreateNewInstanceFromNumber(outer)) 146 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 147 else 148 isValid = false; 149 150 } else if (tag == STRING || (tag == A && pMeta != null 151 && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) { 152 String text = getElementText(r); 153 if (sType.isObject() || sType.isCharSequence()) 154 o = text; 155 else if (sType.isChar()) 156 o = text.charAt(0); 157 else if (sType.canCreateNewInstanceFromString(outer)) 158 o = sType.newInstanceFromString(outer, text); 159 else if (sType.canCreateNewInstanceFromNumber(outer)) 160 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 161 else 162 isValid = false; 163 skipTag(r, tag == STRING ? xSTRING : xA); 164 165 } else if (tag == NUMBER) { 166 String text = getElementText(r); 167 if (sType.isObject()) 168 o = parseNumber(text, Number.class); 169 else if (sType.isNumber()) 170 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 171 else if (sType.canCreateNewInstanceFromNumber(outer)) 172 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 173 else 174 isValid = false; 175 skipTag(r, xNUMBER); 176 177 } else if (tag == BOOLEAN) { 178 String text = getElementText(r); 179 if (sType.isObject() || sType.isBoolean()) 180 o = Boolean.parseBoolean(text); 181 else 182 isValid = false; 183 skipTag(r, xBOOLEAN); 184 185 } else if (tag == P) { 186 String text = getElementText(r); 187 if (! "No Results".equals(text)) 188 isValid = false; 189 skipTag(r, xP); 190 191 } else if (tag == NULL) { 192 skipTag(r, NULL); 193 skipTag(r, xNULL); 194 195 } else if (tag == A) { 196 o = parseAnchor(r, eType); 197 skipTag(r, xA); 198 199 } else if (tag == TABLE) { 200 201 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 202 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 203 204 if (cm != null) { 205 sType = eType = cm; 206 typeName = sType.isCollectionOrArray() ? "array" : "object"; 207 } else if (! "array".equals(typeName)) { 208 // Type name could be a subtype name. 209 typeName = sType.isCollectionOrArray() ? "array" : "object"; 210 } 211 212 if (typeName.equals("object")) { 213 if (sType.isObject()) { 214 o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(), 215 pMeta); 216 } else if (sType.isMap()) { 217 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 218 : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 219 } else if (builder != null) { 220 BeanMap m = toBeanMap(builder.create(this, eType)); 221 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 222 } else if (sType.canCreateNewBean(outer)) { 223 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 224 o = parseIntoBean(r, m).getBean(); 225 } else { 226 isValid = false; 227 } 228 skipTag(r, xTABLE); 229 230 } else if (typeName.equals("array")) { 231 if (sType.isObject()) 232 o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta); 233 else if (sType.isCollection()) 234 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 235 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 236 else if (sType.isArray() || sType.isArgs()) { 237 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 238 o = toArray(sType, l); 239 } 240 else 241 isValid = false; 242 skipTag(r, xTABLE); 243 244 } else { 245 isValid = false; 246 } 247 248 } else if (tag == UL) { 249 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 250 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 251 if (cm != null) 252 sType = eType = cm; 253 254 if (sType.isObject()) 255 o = parseIntoCollection(r, new ObjectList(this), sType, pMeta); 256 else if (sType.isCollection() || sType.isObject()) 257 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 258 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 259 else if (sType.isArray() || sType.isArgs()) 260 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 261 else 262 isValid = false; 263 skipTag(r, xUL); 264 265 } 266 267 if (! isValid) 268 throw new XmlParseException(r.getLocation(), "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 269 270 if (swap != null && o != null) 271 o = swap.unswap(this, o, eType); 272 273 if (outer != null) 274 setParent(eType, o, outer); 275 276 skipWs(r); 277 return (T)o; 278 } 279 280 /* 281 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 282 */ 283 private static HtmlTag skipToData(XmlReader r) throws Exception { 284 while (true) { 285 int event = r.next(); 286 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 287 r.nextTag(); 288 event = r.getEventType(); 289 boolean isEmpty = (event == END_ELEMENT); 290 // Skip until we find a start element, end document, or non-empty text. 291 if (! isEmpty) 292 event = skipWs(r); 293 if (event == END_DOCUMENT) 294 throw new XmlParseException(r.getLocation(), "Unexpected end of stream looking for data."); 295 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 296 } 297 } 298 } 299 300 private static String getAttribute(XmlReader r, String name, String def) { 301 for (int i = 0; i < r.getAttributeCount(); i++) 302 if (r.getAttributeLocalName(i).equals(name)) 303 return r.getAttributeValue(i); 304 return def; 305 } 306 307 /* 308 * Reads an anchor tag and converts it into a bean. 309 */ 310 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 311 throws Exception { 312 String href = r.getAttributeValue(null, "href"); 313 String name = getElementText(r); 314 Class<T> beanClass = beanType.getInnerClass(); 315 if (beanClass.isAnnotationPresent(HtmlLink.class)) { 316 HtmlLink h = beanClass.getAnnotation(HtmlLink.class); 317 BeanMap<T> m = newBeanMap(beanClass); 318 m.put(h.hrefProperty(), href); 319 m.put(h.nameProperty(), name); 320 return m.getBean(); 321 } 322 return convertToType(href, beanType); 323 } 324 325 private static Map<String,String> getAttributes(XmlReader r) { 326 Map<String,String> m = new TreeMap<>() ; 327 for (int i = 0; i < r.getAttributeCount(); i++) 328 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 329 return m; 330 } 331 332 /* 333 * Reads contents of <table> element. 334 * Precondition: Must be pointing at <table> event. 335 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 336 */ 337 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 338 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws Exception { 339 while (true) { 340 HtmlTag tag = nextTag(r, TR, xTABLE); 341 if (tag == xTABLE) 342 break; 343 tag = nextTag(r, TD, TH); 344 // Skip over the column headers. 345 if (tag == TH) { 346 skipTag(r); 347 r.nextTag(); 348 skipTag(r); 349 } else { 350 K key = parseAnything(keyType, r, m, false, pMeta); 351 nextTag(r, TD); 352 V value = parseAnything(valueType, r, m, false, pMeta); 353 setName(valueType, value, key); 354 m.put(key, value); 355 } 356 nextTag(r, xTR); 357 } 358 359 return m; 360 } 361 362 /* 363 * Reads contents of <ul> element. 364 * Precondition: Must be pointing at event following <ul> event. 365 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 366 */ 367 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 368 ClassMeta<?> type, BeanPropertyMeta pMeta) throws Exception { 369 int argIndex = 0; 370 while (true) { 371 HtmlTag tag = nextTag(r, LI, xUL); 372 if (tag == xUL) 373 break; 374 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 375 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 376 } 377 return l; 378 } 379 380 /* 381 * Reads contents of <ul> element. 382 * Precondition: Must be pointing at event following <ul> event. 383 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 384 */ 385 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 386 ClassMeta<E> type, BeanPropertyMeta pMeta) throws Exception { 387 388 HtmlTag tag = nextTag(r, TR); 389 List<String> keys = new ArrayList<>(); 390 while (true) { 391 tag = nextTag(r, TH, xTR); 392 if (tag == xTR) 393 break; 394 keys.add(getElementText(r)); 395 } 396 397 int argIndex = 0; 398 399 while (true) { 400 r.nextTag(); 401 tag = HtmlTag.forEvent(r); 402 if (tag == xTABLE) 403 break; 404 405 ClassMeta elementType = null; 406 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 407 if (beanType != null) 408 elementType = getClassMeta(beanType, pMeta, null); 409 if (elementType == null) 410 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 411 if (elementType == null) 412 elementType = object(); 413 414 if (elementType.canCreateNewBean(l)) { 415 BeanMap m = newBeanMap(l, elementType.getInnerClass()); 416 for (int i = 0; i < keys.size(); i++) { 417 tag = nextTag(r, TD, NULL); 418 if (tag == NULL) { 419 m = null; 420 nextTag(r, xNULL); 421 break; 422 } 423 String key = keys.get(i); 424 BeanMapEntry e = m.getProperty(key); 425 if (e == null) { 426 //onUnknownProperty(key, m, -1, -1); 427 parseAnything(object(), r, l, false, null); 428 } else { 429 BeanPropertyMeta bpm = e.getMeta(); 430 ClassMeta<?> cm = bpm.getClassMeta(); 431 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 432 setName(cm, value, key); 433 bpm.set(m, key, value); 434 } 435 } 436 l.add(m == null ? null : (E)m.getBean()); 437 } else { 438 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 439 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 440 : new ObjectMap(this)); 441 for (int i = 0; i < keys.size(); i++) { 442 tag = nextTag(r, TD, NULL); 443 if (tag == NULL) { 444 m = null; 445 nextTag(r, xNULL); 446 break; 447 } 448 String key = keys.get(i); 449 if (m != null) { 450 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 451 Object value = parseAnything(vt, r, l, false, pMeta); 452 setName(vt, value, key); 453 m.put(convertToType(key, kt), value); 454 } 455 } 456 if (m != null && c != null) { 457 ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this)); 458 m2.put(getBeanTypePropertyName(type.getElementType()), c); 459 l.add((E)cast(m2, pMeta, elementType)); 460 } else { 461 l.add((E)m); 462 } 463 } 464 nextTag(r, xTR); 465 } 466 return l; 467 } 468 469 /* 470 * Reads contents of <table> element. 471 * Precondition: Must be pointing at event following <table> event. 472 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 473 */ 474 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws Exception { 475 while (true) { 476 HtmlTag tag = nextTag(r, TR, xTABLE); 477 if (tag == xTABLE) 478 break; 479 tag = nextTag(r, TD, TH); 480 // Skip over the column headers. 481 if (tag == TH) { 482 skipTag(r); 483 r.nextTag(); 484 skipTag(r); 485 } else { 486 String key = getElementText(r); 487 nextTag(r, TD); 488 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 489 if (pMeta == null) { 490 onUnknownProperty(r.getPipe(), key, m, -1, -1); 491 parseAnything(object(), r, null, false, null); 492 } else { 493 ClassMeta<?> cm = pMeta.getClassMeta(); 494 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 495 setName(cm, value, key); 496 pMeta.set(m, key, value); 497 } 498 } 499 nextTag(r, xTR); 500 } 501 return m; 502 } 503 504 /* 505 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 506 * it's not one of the expected tags. 507 * Precondition: Must be pointing before the event we want to parse. 508 * Postcondition: Pointing at the tag just parsed. 509 */ 510 private static HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws Exception { 511 int et = r.next(); 512 513 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 514 et = r.next(); 515 516 if (et == END_DOCUMENT) 517 throw new XmlParseException(r.getLocation(), "Unexpected end of document."); 518 519 HtmlTag tag = HtmlTag.forEvent(r); 520 if (expected.length == 0) 521 return tag; 522 for (HtmlTag t : expected) 523 if (t == tag) 524 return tag; 525 526 throw new XmlParseException(r.getLocation(), "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 527 } 528 529 /* 530 * Skips over the current element and advances to the next element. 531 * <p> 532 * Precondition: Pointing to opening tag. 533 * Postcondition: Pointing to next opening tag. 534 * 535 * @param r The stream being read from. 536 * @throws XMLStreamException 537 */ 538 private static void skipTag(XmlReader r) throws Exception { 539 int et = r.getEventType(); 540 541 if (et != START_ELEMENT) 542 throw new XmlParseException( 543 r.getLocation(), 544 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 545 XmlUtils.toReadableEvent(r) 546 ); 547 548 String n = r.getLocalName(); 549 550 int depth = 0; 551 while (true) { 552 et = r.next(); 553 if (et == START_ELEMENT) { 554 String n2 = r.getLocalName(); 555 if (n.equals(n2)) 556 depth++; 557 } else if (et == END_ELEMENT) { 558 String n2 = r.getLocalName(); 559 if (n.equals(n2)) 560 depth--; 561 if (depth < 0) 562 return; 563 } 564 } 565 } 566 567 private static void skipTag(XmlReader r, HtmlTag...expected) throws Exception { 568 HtmlTag tag = HtmlTag.forEvent(r); 569 if (tag.isOneOf(expected)) 570 r.next(); 571 else 572 throw new XmlParseException( 573 r.getLocation(), 574 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 575 tag, expected); 576 } 577 578 private static int skipWs(XmlReader r) throws XMLStreamException { 579 int event = r.getEventType(); 580 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 581 event = r.next(); 582 return event; 583 } 584 585 /** 586 * Parses CHARACTERS data. 587 * 588 * <p> 589 * Precondition: Pointing to event immediately following opening tag. 590 * Postcondition: Pointing to closing tag. 591 * 592 * @param r The stream being read from. 593 * @return The parsed string. 594 * @throws XMLStreamException 595 */ 596 @Override /* XmlParserSession */ 597 protected final String parseText(XmlReader r) throws Exception { 598 599 StringBuilder sb = getStringBuilder(); 600 601 int et = r.getEventType(); 602 if (et == END_ELEMENT) 603 return ""; 604 605 int depth = 0; 606 607 String characters = null; 608 609 while (true) { 610 if (et == START_ELEMENT) { 611 if (characters != null) { 612 if (sb.length() == 0) 613 characters = trimStart(characters); 614 sb.append(characters); 615 characters = null; 616 } 617 HtmlTag tag = HtmlTag.forEvent(r); 618 if (tag == BR) { 619 sb.append('\n'); 620 r.nextTag(); 621 } else if (tag == BS) { 622 sb.append('\b'); 623 r.nextTag(); 624 } else if (tag == SP) { 625 et = r.next(); 626 if (et == CHARACTERS) { 627 String s = r.getText(); 628 if (s.length() > 0) { 629 char c = r.getText().charAt(0); 630 if (c == '\u2003') 631 c = '\t'; 632 sb.append(c); 633 } 634 r.nextTag(); 635 } 636 } else if (tag == FF) { 637 sb.append('\f'); 638 r.nextTag(); 639 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 640 et = r.next(); 641 if (et == CHARACTERS) { 642 sb.append(r.getText()); 643 r.nextTag(); 644 } 645 } else { 646 sb.append('<').append(r.getLocalName()); 647 for (int i = 0; i < r.getAttributeCount(); i++) 648 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 649 sb.append('>'); 650 depth++; 651 } 652 } else if (et == END_ELEMENT) { 653 if (characters != null) { 654 if (sb.length() == 0) 655 characters = trimStart(characters); 656 if (depth == 0) 657 characters = trimEnd(characters); 658 sb.append(characters); 659 characters = null; 660 } 661 if (depth == 0) 662 break; 663 sb.append('<').append(r.getLocalName()).append('>'); 664 depth--; 665 } else if (et == CHARACTERS) { 666 characters = r.getText(); 667 } 668 et = r.next(); 669 } 670 671 String s = trim(sb.toString()); 672 returnStringBuilder(sb); 673 return s; 674 } 675 676 /** 677 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 678 * 679 * <p> 680 * Precondition: Pointing to opening tag. 681 * Postcondition: Pointing to closing tag. 682 * 683 * @param r The stream being read from. 684 * @return The parsed string. 685 * @throws XMLStreamException 686 */ 687 @Override /* XmlParserSession */ 688 protected final String getElementText(XmlReader r) throws Exception { 689 r.next(); 690 return parseText(r); 691 } 692 693 @Override /* XmlParserSession */ 694 protected final boolean isWhitespaceElement(XmlReader r) { 695 String s = r.getLocalName(); 696 return whitespaceElements.contains(s); 697 } 698 699 @Override /* XmlParserSession */ 700 protected final String parseWhitespaceElement(XmlReader r) throws Exception { 701 702 HtmlTag tag = HtmlTag.forEvent(r); 703 int et = r.next(); 704 if (tag == BR) { 705 return "\n"; 706 } else if (tag == BS) { 707 return "\b"; 708 } else if (tag == FF) { 709 return "\f"; 710 } else if (tag == SP) { 711 if (et == CHARACTERS) { 712 String s = r.getText(); 713 if (s.charAt(0) == '\u2003') 714 s = "\t"; 715 r.next(); 716 return decodeString(s); 717 } 718 return ""; 719 } else { 720 throw new XmlParseException(r.getLocation(), "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 721 } 722 } 723}