001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.html.HtmlTag.*; 017import static org.apache.juneau.internal.StringUtils.*; 018 019import java.lang.reflect.*; 020import java.util.*; 021 022import javax.xml.stream.*; 023 024import org.apache.juneau.*; 025import org.apache.juneau.html.annotation.*; 026import org.apache.juneau.parser.*; 027import org.apache.juneau.transform.*; 028import org.apache.juneau.xml.*; 029 030/** 031 * Session object that lives for the duration of a single use of {@link HtmlParser}. 032 * 033 * <p> 034 * This class is NOT thread safe. 035 * It is typically discarded after one-time use although it can be reused against multiple inputs. 036 */ 037@SuppressWarnings({ "unchecked", "rawtypes" }) 038public final class HtmlParserSession extends XmlParserSession { 039 040 private static final Set<String> whitespaceElements = new HashSet<>( 041 Arrays.asList( 042 new String[]{"br","bs","sp","ff"} 043 ) 044 ); 045 046 /** 047 * Create a new session using properties specified in the context. 048 * 049 * @param ctx 050 * The context creating this session object. 051 * The context contains all the configuration settings for this object. 052 * @param args 053 * Runtime session arguments. 054 */ 055 protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) { 056 super(ctx, args); 057 } 058 059 @Override /* ParserSession */ 060 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws Exception { 061 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 062 } 063 064 @Override /* ReaderParserSession */ 065 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 066 throws Exception { 067 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 068 (ClassMeta<V>)getClassMeta(valueType), null); 069 } 070 071 @Override /* ReaderParserSession */ 072 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 073 throws Exception { 074 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 075 } 076 077 /* 078 * Reads anything starting at the current event. 079 * <p> 080 * Precondition: Must be pointing at outer START_ELEMENT. 081 * Postcondition: Pointing at outer END_ELEMENT. 082 */ 083 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws Exception { 084 085 if (eType == null) 086 eType = (ClassMeta<T>)object(); 087 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 088 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 089 ClassMeta<?> sType = null; 090 if (builder != null) 091 sType = builder.getBuilderClassMeta(this); 092 else if (swap != null) 093 sType = swap.getSwapClassMeta(this); 094 else 095 sType = eType; 096 setCurrentClass(sType); 097 098 int event = r.getEventType(); 099 if (event != START_ELEMENT) 100 throw new ParseException(this, "parseAnything must be called on outer start element."); 101 102 if (! isRoot) 103 event = r.next(); 104 boolean isEmpty = (event == END_ELEMENT); 105 106 // Skip until we find a start element, end document, or non-empty text. 107 if (! isEmpty) 108 event = skipWs(r); 109 110 if (event == END_DOCUMENT) 111 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 112 113 // Handle @Html(asXml=true) beans. 114 HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class); 115 if (hcm.getFormat() == HtmlFormat.XML) 116 return super.parseAnything(eType, null, r, outer, false, pMeta); 117 118 Object o = null; 119 120 boolean isValid = true; 121 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 122 123 // If it's not a known tag, then parse it as XML. 124 // Allows us to parse stuff like "<div/>" into HTML5 beans. 125 if (tag == null && event != CHARACTERS) 126 return super.parseAnything(eType, null, r, outer, false, pMeta); 127 128 if (tag == HTML) 129 tag = skipToData(r); 130 131 if (isEmpty) { 132 o = ""; 133 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 134 String text = parseText(r); 135 if (sType.isObject() || sType.isCharSequence()) 136 o = text; 137 else if (sType.isChar()) 138 o = parseCharacter(text); 139 else if (sType.isBoolean()) 140 o = Boolean.parseBoolean(text); 141 else if (sType.isNumber()) 142 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 143 else if (sType.canCreateNewInstanceFromString(outer)) 144 o = sType.newInstanceFromString(outer, text); 145 else if (sType.canCreateNewInstanceFromNumber(outer)) 146 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 147 else 148 isValid = false; 149 150 } else if (tag == STRING || (tag == A && pMeta != null 151 && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) { 152 String text = getElementText(r); 153 if (sType.isObject() || sType.isCharSequence()) 154 o = text; 155 else if (sType.isChar()) 156 o = parseCharacter(text); 157 else if (sType.canCreateNewInstanceFromString(outer)) 158 o = sType.newInstanceFromString(outer, text); 159 else if (sType.canCreateNewInstanceFromNumber(outer)) 160 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 161 else 162 isValid = false; 163 skipTag(r, tag == STRING ? xSTRING : xA); 164 165 } else if (tag == NUMBER) { 166 String text = getElementText(r); 167 if (sType.isObject()) 168 o = parseNumber(text, Number.class); 169 else if (sType.isNumber()) 170 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 171 else if (sType.canCreateNewInstanceFromNumber(outer)) 172 o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass())); 173 else 174 isValid = false; 175 skipTag(r, xNUMBER); 176 177 } else if (tag == BOOLEAN) { 178 String text = getElementText(r); 179 if (sType.isObject() || sType.isBoolean()) 180 o = Boolean.parseBoolean(text); 181 else 182 isValid = false; 183 skipTag(r, xBOOLEAN); 184 185 } else if (tag == P) { 186 String text = getElementText(r); 187 if (! "No Results".equals(text)) 188 isValid = false; 189 skipTag(r, xP); 190 191 } else if (tag == NULL) { 192 skipTag(r, NULL); 193 skipTag(r, xNULL); 194 195 } else if (tag == A) { 196 o = parseAnchor(r, eType); 197 skipTag(r, xA); 198 199 } else if (tag == TABLE) { 200 201 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 202 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 203 204 if (cm != null) { 205 sType = eType = cm; 206 typeName = sType.isCollectionOrArray() ? "array" : "object"; 207 } else if (! "array".equals(typeName)) { 208 // Type name could be a subtype name. 209 typeName = sType.isCollectionOrArray() ? "array" : "object"; 210 } 211 212 if (typeName.equals("object")) { 213 if (sType.isObject()) { 214 o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(), 215 pMeta); 216 } else if (sType.isMap()) { 217 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 218 : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta); 219 } else if (builder != null) { 220 BeanMap m = toBeanMap(builder.create(this, eType)); 221 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 222 } else if (sType.canCreateNewBean(outer)) { 223 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 224 o = parseIntoBean(r, m).getBean(); 225 } else { 226 isValid = false; 227 } 228 skipTag(r, xTABLE); 229 230 } else if (typeName.equals("array")) { 231 if (sType.isObject()) 232 o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta); 233 else if (sType.isCollection()) 234 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 235 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 236 else if (sType.isArray() || sType.isArgs()) { 237 ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta); 238 o = toArray(sType, l); 239 } 240 else 241 isValid = false; 242 skipTag(r, xTABLE); 243 244 } else { 245 isValid = false; 246 } 247 248 } else if (tag == UL) { 249 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 250 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 251 if (cm != null) 252 sType = eType = cm; 253 254 if (sType.isObject()) 255 o = parseIntoCollection(r, new ObjectList(this), sType, pMeta); 256 else if (sType.isCollection() || sType.isObject()) 257 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 258 ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta); 259 else if (sType.isArray() || sType.isArgs()) 260 o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta)); 261 else 262 isValid = false; 263 skipTag(r, xUL); 264 265 } 266 267 if (! isValid) 268 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 269 270 if (swap != null && o != null) 271 o = swap.unswap(this, o, eType); 272 273 if (outer != null) 274 setParent(eType, o, outer); 275 276 skipWs(r); 277 return (T)o; 278 } 279 280 /* 281 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 282 */ 283 private HtmlTag skipToData(XmlReader r) throws Exception { 284 while (true) { 285 int event = r.next(); 286 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 287 r.nextTag(); 288 event = r.getEventType(); 289 boolean isEmpty = (event == END_ELEMENT); 290 // Skip until we find a start element, end document, or non-empty text. 291 if (! isEmpty) 292 event = skipWs(r); 293 if (event == END_DOCUMENT) 294 throw new ParseException(this, "Unexpected end of stream looking for data."); 295 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 296 } 297 } 298 } 299 300 private static String getAttribute(XmlReader r, String name, String def) { 301 for (int i = 0; i < r.getAttributeCount(); i++) 302 if (r.getAttributeLocalName(i).equals(name)) 303 return r.getAttributeValue(i); 304 return def; 305 } 306 307 /* 308 * Reads an anchor tag and converts it into a bean. 309 */ 310 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 311 throws Exception { 312 String href = r.getAttributeValue(null, "href"); 313 String name = getElementText(r); 314 Class<T> beanClass = beanType.getInnerClass(); 315 if (beanClass.isAnnotationPresent(HtmlLink.class)) { 316 HtmlLink h = beanClass.getAnnotation(HtmlLink.class); 317 BeanMap<T> m = newBeanMap(beanClass); 318 m.put(h.uriProperty(), href); 319 m.put(h.nameProperty(), name); 320 return m.getBean(); 321 } 322 return convertToType(href, beanType); 323 } 324 325 private static Map<String,String> getAttributes(XmlReader r) { 326 Map<String,String> m = new TreeMap<>() ; 327 for (int i = 0; i < r.getAttributeCount(); i++) 328 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 329 return m; 330 } 331 332 /* 333 * Reads contents of <table> element. 334 * Precondition: Must be pointing at <table> event. 335 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 336 */ 337 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 338 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws Exception { 339 while (true) { 340 HtmlTag tag = nextTag(r, TR, xTABLE); 341 if (tag == xTABLE) 342 break; 343 tag = nextTag(r, TD, TH); 344 // Skip over the column headers. 345 if (tag == TH) { 346 skipTag(r); 347 r.nextTag(); 348 skipTag(r); 349 } else { 350 K key = parseAnything(keyType, r, m, false, pMeta); 351 nextTag(r, TD); 352 V value = parseAnything(valueType, r, m, false, pMeta); 353 setName(valueType, value, key); 354 m.put(key, value); 355 } 356 nextTag(r, xTR); 357 } 358 359 return m; 360 } 361 362 /* 363 * Reads contents of <ul> element. 364 * Precondition: Must be pointing at event following <ul> event. 365 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 366 */ 367 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 368 ClassMeta<?> type, BeanPropertyMeta pMeta) throws Exception { 369 int argIndex = 0; 370 while (true) { 371 HtmlTag tag = nextTag(r, LI, xUL); 372 if (tag == xUL) 373 break; 374 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 375 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 376 } 377 return l; 378 } 379 380 /* 381 * Reads contents of <ul> element. 382 * Precondition: Must be pointing at event following <ul> event. 383 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 384 */ 385 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 386 ClassMeta<E> type, BeanPropertyMeta pMeta) throws Exception { 387 388 HtmlTag tag = nextTag(r, TR); 389 List<String> keys = new ArrayList<>(); 390 while (true) { 391 tag = nextTag(r, TH, xTR); 392 if (tag == xTR) 393 break; 394 keys.add(getElementText(r)); 395 } 396 397 int argIndex = 0; 398 399 while (true) { 400 r.nextTag(); 401 tag = HtmlTag.forEvent(this, r); 402 if (tag == xTABLE) 403 break; 404 405 ClassMeta elementType = null; 406 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 407 if (beanType != null) 408 elementType = getClassMeta(beanType, pMeta, null); 409 if (elementType == null) 410 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 411 if (elementType == null) 412 elementType = object(); 413 414 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 415 416 if (builder != null || elementType.canCreateNewBean(l)) { 417 BeanMap m = 418 builder != null 419 ? toBeanMap(builder.create(this, elementType)) 420 : newBeanMap(l, elementType.getInnerClass()) 421 ; 422 for (int i = 0; i < keys.size(); i++) { 423 tag = nextTag(r, TD, NULL); 424 if (tag == NULL) { 425 m = null; 426 nextTag(r, xNULL); 427 break; 428 } 429 String key = keys.get(i); 430 BeanMapEntry e = m.getProperty(key); 431 if (e == null) { 432 //onUnknownProperty(key, m, -1, -1); 433 parseAnything(object(), r, l, false, null); 434 } else { 435 BeanPropertyMeta bpm = e.getMeta(); 436 ClassMeta<?> cm = bpm.getClassMeta(); 437 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 438 setName(cm, value, key); 439 bpm.set(m, key, value); 440 } 441 } 442 l.add( 443 m == null 444 ? null 445 : builder != null 446 ? builder.build(this, m.getBean(), elementType) 447 : (E)m.getBean() 448 ); 449 } else { 450 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 451 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 452 : new ObjectMap(this)); 453 for (int i = 0; i < keys.size(); i++) { 454 tag = nextTag(r, TD, NULL); 455 if (tag == NULL) { 456 m = null; 457 nextTag(r, xNULL); 458 break; 459 } 460 String key = keys.get(i); 461 if (m != null) { 462 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 463 Object value = parseAnything(vt, r, l, false, pMeta); 464 setName(vt, value, key); 465 m.put(convertToType(key, kt), value); 466 } 467 } 468 if (m != null && c != null) { 469 ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this)); 470 m2.put(getBeanTypePropertyName(type.getElementType()), c); 471 l.add((E)cast(m2, pMeta, elementType)); 472 } else { 473 l.add((E)m); 474 } 475 } 476 nextTag(r, xTR); 477 } 478 return l; 479 } 480 481 /* 482 * Reads contents of <table> element. 483 * Precondition: Must be pointing at event following <table> event. 484 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 485 */ 486 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws Exception { 487 while (true) { 488 HtmlTag tag = nextTag(r, TR, xTABLE); 489 if (tag == xTABLE) 490 break; 491 tag = nextTag(r, TD, TH); 492 // Skip over the column headers. 493 if (tag == TH) { 494 skipTag(r); 495 r.nextTag(); 496 skipTag(r); 497 } else { 498 String key = getElementText(r); 499 nextTag(r, TD); 500 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 501 if (pMeta == null) { 502 onUnknownProperty(key, m); 503 parseAnything(object(), r, null, false, null); 504 } else { 505 ClassMeta<?> cm = pMeta.getClassMeta(); 506 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 507 setName(cm, value, key); 508 pMeta.set(m, key, value); 509 } 510 } 511 nextTag(r, xTR); 512 } 513 return m; 514 } 515 516 /* 517 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 518 * it's not one of the expected tags. 519 * Precondition: Must be pointing before the event we want to parse. 520 * Postcondition: Pointing at the tag just parsed. 521 */ 522 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws Exception { 523 int et = r.next(); 524 525 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 526 et = r.next(); 527 528 if (et == END_DOCUMENT) 529 throw new ParseException(this, "Unexpected end of document."); 530 531 HtmlTag tag = HtmlTag.forEvent(this, r); 532 if (expected.length == 0) 533 return tag; 534 for (HtmlTag t : expected) 535 if (t == tag) 536 return tag; 537 538 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 539 } 540 541 /* 542 * Skips over the current element and advances to the next element. 543 * <p> 544 * Precondition: Pointing to opening tag. 545 * Postcondition: Pointing to next opening tag. 546 * 547 * @param r The stream being read from. 548 * @throws XMLStreamException 549 */ 550 private void skipTag(XmlReader r) throws Exception { 551 int et = r.getEventType(); 552 553 if (et != START_ELEMENT) 554 throw new ParseException(this, 555 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 556 XmlUtils.toReadableEvent(r) 557 ); 558 559 String n = r.getLocalName(); 560 561 int depth = 0; 562 while (true) { 563 et = r.next(); 564 if (et == START_ELEMENT) { 565 String n2 = r.getLocalName(); 566 if (n.equals(n2)) 567 depth++; 568 } else if (et == END_ELEMENT) { 569 String n2 = r.getLocalName(); 570 if (n.equals(n2)) 571 depth--; 572 if (depth < 0) 573 return; 574 } 575 } 576 } 577 578 private void skipTag(XmlReader r, HtmlTag...expected) throws Exception { 579 HtmlTag tag = HtmlTag.forEvent(this, r); 580 if (tag.isOneOf(expected)) 581 r.next(); 582 else 583 throw new ParseException(this, 584 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 585 tag, expected); 586 } 587 588 private static int skipWs(XmlReader r) throws XMLStreamException { 589 int event = r.getEventType(); 590 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 591 event = r.next(); 592 return event; 593 } 594 595 /** 596 * Parses CHARACTERS data. 597 * 598 * <p> 599 * Precondition: Pointing to event immediately following opening tag. 600 * Postcondition: Pointing to closing tag. 601 * 602 * @param r The stream being read from. 603 * @return The parsed string. 604 * @throws XMLStreamException 605 */ 606 @Override /* XmlParserSession */ 607 protected final String parseText(XmlReader r) throws Exception { 608 609 StringBuilder sb = getStringBuilder(); 610 611 int et = r.getEventType(); 612 if (et == END_ELEMENT) 613 return ""; 614 615 int depth = 0; 616 617 String characters = null; 618 619 while (true) { 620 if (et == START_ELEMENT) { 621 if (characters != null) { 622 if (sb.length() == 0) 623 characters = trimStart(characters); 624 sb.append(characters); 625 characters = null; 626 } 627 HtmlTag tag = HtmlTag.forEvent(this, r); 628 if (tag == BR) { 629 sb.append('\n'); 630 r.nextTag(); 631 } else if (tag == BS) { 632 sb.append('\b'); 633 r.nextTag(); 634 } else if (tag == SP) { 635 et = r.next(); 636 if (et == CHARACTERS) { 637 String s = r.getText(); 638 if (s.length() > 0) { 639 char c = r.getText().charAt(0); 640 if (c == '\u2003') 641 c = '\t'; 642 sb.append(c); 643 } 644 r.nextTag(); 645 } 646 } else if (tag == FF) { 647 sb.append('\f'); 648 r.nextTag(); 649 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 650 et = r.next(); 651 if (et == CHARACTERS) { 652 sb.append(r.getText()); 653 r.nextTag(); 654 } 655 } else { 656 sb.append('<').append(r.getLocalName()); 657 for (int i = 0; i < r.getAttributeCount(); i++) 658 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 659 sb.append('>'); 660 depth++; 661 } 662 } else if (et == END_ELEMENT) { 663 if (characters != null) { 664 if (sb.length() == 0) 665 characters = trimStart(characters); 666 if (depth == 0) 667 characters = trimEnd(characters); 668 sb.append(characters); 669 characters = null; 670 } 671 if (depth == 0) 672 break; 673 sb.append('<').append(r.getLocalName()).append('>'); 674 depth--; 675 } else if (et == CHARACTERS) { 676 characters = r.getText(); 677 } 678 et = r.next(); 679 } 680 681 String s = trim(sb.toString()); 682 returnStringBuilder(sb); 683 return s; 684 } 685 686 /** 687 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 688 * 689 * <p> 690 * Precondition: Pointing to opening tag. 691 * Postcondition: Pointing to closing tag. 692 * 693 * @param r The stream being read from. 694 * @return The parsed string. 695 * @throws XMLStreamException 696 */ 697 @Override /* XmlParserSession */ 698 protected final String getElementText(XmlReader r) throws Exception { 699 r.next(); 700 return parseText(r); 701 } 702 703 @Override /* XmlParserSession */ 704 protected final boolean isWhitespaceElement(XmlReader r) { 705 String s = r.getLocalName(); 706 return whitespaceElements.contains(s); 707 } 708 709 @Override /* XmlParserSession */ 710 protected final String parseWhitespaceElement(XmlReader r) throws Exception { 711 712 HtmlTag tag = HtmlTag.forEvent(this, r); 713 int et = r.next(); 714 if (tag == BR) { 715 return "\n"; 716 } else if (tag == BS) { 717 return "\b"; 718 } else if (tag == FF) { 719 return "\f"; 720 } else if (tag == SP) { 721 if (et == CHARACTERS) { 722 String s = r.getText(); 723 if (s.charAt(0) == '\u2003') 724 s = "\t"; 725 r.next(); 726 return decodeString(s); 727 } 728 return ""; 729 } else { 730 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 731 } 732 } 733}