001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.html;
014
015import static javax.xml.stream.XMLStreamConstants.*;
016import static org.apache.juneau.html.HtmlTag.*;
017import static org.apache.juneau.internal.StringUtils.*;
018
019import java.lang.reflect.*;
020import java.util.*;
021
022import javax.xml.stream.*;
023
024import org.apache.juneau.*;
025import org.apache.juneau.html.annotation.*;
026import org.apache.juneau.parser.*;
027import org.apache.juneau.transform.*;
028import org.apache.juneau.xml.*;
029
030/**
031 * Session object that lives for the duration of a single use of {@link HtmlParser}.
032 *
033 * <p>
034 * This class is NOT thread safe.
035 * It is typically discarded after one-time use although it can be reused against multiple inputs.
036 */
037@SuppressWarnings({ "unchecked", "rawtypes" })
038public final class HtmlParserSession extends XmlParserSession {
039
040   private static final Set<String> whitespaceElements = new HashSet<>(
041      Arrays.asList(
042         new String[]{"br","bs","sp","ff"}
043      )
044   );
045
046   /**
047    * Create a new session using properties specified in the context.
048    *
049    * @param ctx
050    *    The context creating this session object.
051    *    The context contains all the configuration settings for this object.
052    * @param args
053    *    Runtime session arguments.
054    */
055   protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) {
056      super(ctx, args);
057   }
058
059   @Override /* ParserSession */
060   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws Exception {
061      return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
062   }
063
064   @Override /* ReaderParserSession */
065   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType)
066         throws Exception {
067      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType),
068         (ClassMeta<V>)getClassMeta(valueType), null);
069   }
070
071   @Override /* ReaderParserSession */
072   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType)
073         throws Exception {
074      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
075   }
076
077   /*
078    * Reads anything starting at the current event.
079    * <p>
080    * Precondition:  Must be pointing at outer START_ELEMENT.
081    * Postcondition:  Pointing at outer END_ELEMENT.
082    */
083   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws Exception {
084
085      if (eType == null)
086         eType = (ClassMeta<T>)object();
087      PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this);
088      BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
089      ClassMeta<?> sType = null;
090      if (builder != null)
091         sType = builder.getBuilderClassMeta(this);
092      else if (swap != null)
093         sType = swap.getSwapClassMeta(this);
094      else
095         sType = eType;
096      setCurrentClass(sType);
097
098      int event = r.getEventType();
099      if (event != START_ELEMENT)
100         throw new ParseException(this, "parseAnything must be called on outer start element.");
101
102      if (! isRoot)
103         event = r.next();
104      boolean isEmpty = (event == END_ELEMENT);
105
106      // Skip until we find a start element, end document, or non-empty text.
107      if (! isEmpty)
108         event = skipWs(r);
109
110      if (event == END_DOCUMENT)
111         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
112
113      // Handle @Html(asXml=true) beans.
114      HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class);
115      if (hcm.getFormat() == HtmlFormat.XML)
116         return super.parseAnything(eType, null, r, outer, false, pMeta);
117
118      Object o = null;
119
120      boolean isValid = true;
121      HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
122
123      // If it's not a known tag, then parse it as XML.
124      // Allows us to parse stuff like "<div/>" into HTML5 beans.
125      if (tag == null && event != CHARACTERS)
126         return super.parseAnything(eType, null, r, outer, false, pMeta);
127
128      if (tag == HTML)
129         tag = skipToData(r);
130
131      if (isEmpty) {
132         o = "";
133      } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) {
134         String text = parseText(r);
135         if (sType.isObject() || sType.isCharSequence())
136            o = text;
137         else if (sType.isChar())
138            o = parseCharacter(text);
139         else if (sType.isBoolean())
140            o = Boolean.parseBoolean(text);
141         else if (sType.isNumber())
142            o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass());
143         else if (sType.canCreateNewInstanceFromString(outer))
144            o = sType.newInstanceFromString(outer, text);
145         else if (sType.canCreateNewInstanceFromNumber(outer))
146            o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass()));
147         else
148            isValid = false;
149
150      } else if (tag == STRING || (tag == A && pMeta != null
151            && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) {
152         String text = getElementText(r);
153         if (sType.isObject() || sType.isCharSequence())
154            o = text;
155         else if (sType.isChar())
156            o = parseCharacter(text);
157         else if (sType.canCreateNewInstanceFromString(outer))
158            o = sType.newInstanceFromString(outer, text);
159         else if (sType.canCreateNewInstanceFromNumber(outer))
160            o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass()));
161         else
162            isValid = false;
163         skipTag(r, tag == STRING ? xSTRING : xA);
164
165      } else if (tag == NUMBER) {
166         String text = getElementText(r);
167         if (sType.isObject())
168            o = parseNumber(text, Number.class);
169         else if (sType.isNumber())
170            o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass());
171         else if (sType.canCreateNewInstanceFromNumber(outer))
172            o = sType.newInstanceFromNumber(this, outer, parseNumber(text, sType.getNewInstanceFromNumberClass()));
173         else
174            isValid = false;
175         skipTag(r, xNUMBER);
176
177      } else if (tag == BOOLEAN) {
178         String text = getElementText(r);
179         if (sType.isObject() || sType.isBoolean())
180            o = Boolean.parseBoolean(text);
181         else
182            isValid = false;
183         skipTag(r, xBOOLEAN);
184
185      } else if (tag == P) {
186         String text = getElementText(r);
187         if (! "No Results".equals(text))
188            isValid = false;
189         skipTag(r, xP);
190
191      } else if (tag == NULL) {
192         skipTag(r, NULL);
193         skipTag(r, xNULL);
194
195      } else if (tag == A) {
196         o = parseAnchor(r, eType);
197         skipTag(r, xA);
198
199      } else if (tag == TABLE) {
200
201         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
202         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
203
204         if (cm != null) {
205            sType = eType = cm;
206            typeName = sType.isCollectionOrArray() ? "array" : "object";
207         } else if (! "array".equals(typeName)) {
208            // Type name could be a subtype name.
209            typeName = sType.isCollectionOrArray() ? "array" : "object";
210         }
211
212         if (typeName.equals("object")) {
213            if (sType.isObject()) {
214               o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(),
215                  pMeta);
216            } else if (sType.isMap()) {
217               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer)
218                  : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta);
219            } else if (builder != null) {
220               BeanMap m = toBeanMap(builder.create(this, eType));
221               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
222            } else if (sType.canCreateNewBean(outer)) {
223               BeanMap m = newBeanMap(outer, sType.getInnerClass());
224               o = parseIntoBean(r, m).getBean();
225            } else {
226               isValid = false;
227            }
228            skipTag(r, xTABLE);
229
230         } else if (typeName.equals("array")) {
231            if (sType.isObject())
232               o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta);
233            else if (sType.isCollection())
234               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
235                  ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
236            else if (sType.isArray() || sType.isArgs()) {
237               ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta);
238               o = toArray(sType, l);
239            }
240            else
241               isValid = false;
242            skipTag(r, xTABLE);
243
244         } else {
245            isValid = false;
246         }
247
248      } else if (tag == UL) {
249         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
250         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
251         if (cm != null)
252            sType = eType = cm;
253
254         if (sType.isObject())
255            o = parseIntoCollection(r, new ObjectList(this), sType, pMeta);
256         else if (sType.isCollection() || sType.isObject())
257            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
258               ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
259         else if (sType.isArray() || sType.isArgs())
260            o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta));
261         else
262            isValid = false;
263         skipTag(r, xUL);
264
265      }
266
267      if (! isValid)
268         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
269
270      if (swap != null && o != null)
271         o = swap.unswap(this, o, eType);
272
273      if (outer != null)
274         setParent(eType, o, outer);
275
276      skipWs(r);
277      return (T)o;
278   }
279
280   /*
281    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
282    */
283   private HtmlTag skipToData(XmlReader r) throws Exception {
284      while (true) {
285         int event = r.next();
286         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
287            r.nextTag();
288            event = r.getEventType();
289            boolean isEmpty = (event == END_ELEMENT);
290            // Skip until we find a start element, end document, or non-empty text.
291            if (! isEmpty)
292               event = skipWs(r);
293            if (event == END_DOCUMENT)
294               throw new ParseException(this, "Unexpected end of stream looking for data.");
295            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
296         }
297      }
298   }
299
300   private static String getAttribute(XmlReader r, String name, String def) {
301      for (int i = 0; i < r.getAttributeCount(); i++)
302         if (r.getAttributeLocalName(i).equals(name))
303            return r.getAttributeValue(i);
304      return def;
305   }
306
307   /*
308    * Reads an anchor tag and converts it into a bean.
309    */
310   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType)
311         throws Exception {
312      String href = r.getAttributeValue(null, "href");
313      String name = getElementText(r);
314      Class<T> beanClass = beanType.getInnerClass();
315      if (beanClass.isAnnotationPresent(HtmlLink.class)) {
316         HtmlLink h = beanClass.getAnnotation(HtmlLink.class);
317         BeanMap<T> m = newBeanMap(beanClass);
318         m.put(h.uriProperty(), href);
319         m.put(h.nameProperty(), name);
320         return m.getBean();
321      }
322      return convertToType(href, beanType);
323   }
324
325   private static Map<String,String> getAttributes(XmlReader r) {
326      Map<String,String> m = new TreeMap<>() ;
327      for (int i = 0; i < r.getAttributeCount(); i++)
328         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
329      return m;
330   }
331
332   /*
333    * Reads contents of <table> element.
334    * Precondition:  Must be pointing at <table> event.
335    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
336    */
337   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType,
338         ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws Exception {
339      while (true) {
340         HtmlTag tag = nextTag(r, TR, xTABLE);
341         if (tag == xTABLE)
342            break;
343         tag = nextTag(r, TD, TH);
344         // Skip over the column headers.
345         if (tag == TH) {
346            skipTag(r);
347            r.nextTag();
348            skipTag(r);
349         } else {
350            K key = parseAnything(keyType, r, m, false, pMeta);
351            nextTag(r, TD);
352            V value = parseAnything(valueType, r, m, false, pMeta);
353            setName(valueType, value, key);
354            m.put(key, value);
355         }
356         nextTag(r, xTR);
357      }
358
359      return m;
360   }
361
362   /*
363    * Reads contents of <ul> element.
364    * Precondition:  Must be pointing at event following <ul> event.
365    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
366    */
367   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l,
368         ClassMeta<?> type, BeanPropertyMeta pMeta) throws Exception {
369      int argIndex = 0;
370      while (true) {
371         HtmlTag tag = nextTag(r, LI, xUL);
372         if (tag == xUL)
373            break;
374         ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
375         l.add((E)parseAnything(elementType, r, l, false, pMeta));
376      }
377      return l;
378   }
379
380   /*
381    * Reads contents of <ul> element.
382    * Precondition:  Must be pointing at event following <ul> event.
383    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
384    */
385   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l,
386         ClassMeta<E> type, BeanPropertyMeta pMeta) throws Exception {
387
388      HtmlTag tag = nextTag(r, TR);
389      List<String> keys = new ArrayList<>();
390      while (true) {
391         tag = nextTag(r, TH, xTR);
392         if (tag == xTR)
393            break;
394         keys.add(getElementText(r));
395      }
396
397      int argIndex = 0;
398
399      while (true) {
400         r.nextTag();
401         tag = HtmlTag.forEvent(this, r);
402         if (tag == xTABLE)
403            break;
404
405         ClassMeta elementType = null;
406         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
407         if (beanType != null)
408            elementType = getClassMeta(beanType, pMeta, null);
409         if (elementType == null)
410            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
411         if (elementType == null)
412            elementType = object();
413
414         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
415
416         if (builder != null || elementType.canCreateNewBean(l)) {
417            BeanMap m =
418               builder != null
419               ? toBeanMap(builder.create(this, elementType))
420               : newBeanMap(l, elementType.getInnerClass())
421            ;
422            for (int i = 0; i < keys.size(); i++) {
423               tag = nextTag(r, TD, NULL);
424               if (tag == NULL) {
425                  m = null;
426                  nextTag(r, xNULL);
427                  break;
428               }
429               String key = keys.get(i);
430               BeanMapEntry e = m.getProperty(key);
431               if (e == null) {
432                  //onUnknownProperty(key, m, -1, -1);
433                  parseAnything(object(), r, l, false, null);
434               } else {
435                  BeanPropertyMeta bpm = e.getMeta();
436                  ClassMeta<?> cm = bpm.getClassMeta();
437                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
438                  setName(cm, value, key);
439                  bpm.set(m, key, value);
440               }
441            }
442            l.add(
443               m == null
444               ? null
445               : builder != null
446                  ? builder.build(this, m.getBean(), elementType)
447                  : (E)m.getBean()
448            );
449         } else {
450            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
451            Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l)
452               : new ObjectMap(this));
453            for (int i = 0; i < keys.size(); i++) {
454               tag = nextTag(r, TD, NULL);
455               if (tag == NULL) {
456                  m = null;
457                  nextTag(r, xNULL);
458                  break;
459               }
460               String key = keys.get(i);
461               if (m != null) {
462                  ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType();
463                  Object value = parseAnything(vt, r, l, false, pMeta);
464                  setName(vt, value, key);
465                  m.put(convertToType(key, kt), value);
466               }
467            }
468            if (m != null && c != null) {
469               ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this));
470               m2.put(getBeanTypePropertyName(type.getElementType()), c);
471               l.add((E)cast(m2, pMeta, elementType));
472            } else {
473               l.add((E)m);
474            }
475         }
476         nextTag(r, xTR);
477      }
478      return l;
479   }
480
481   /*
482    * Reads contents of <table> element.
483    * Precondition:  Must be pointing at event following <table> event.
484    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
485    */
486   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws Exception {
487      while (true) {
488         HtmlTag tag = nextTag(r, TR, xTABLE);
489         if (tag == xTABLE)
490            break;
491         tag = nextTag(r, TD, TH);
492         // Skip over the column headers.
493         if (tag == TH) {
494            skipTag(r);
495            r.nextTag();
496            skipTag(r);
497         } else {
498            String key = getElementText(r);
499            nextTag(r, TD);
500            BeanPropertyMeta pMeta = m.getPropertyMeta(key);
501            if (pMeta == null) {
502               onUnknownProperty(key, m);
503               parseAnything(object(), r, null, false, null);
504            } else {
505               ClassMeta<?> cm = pMeta.getClassMeta();
506               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
507               setName(cm, value, key);
508               pMeta.set(m, key, value);
509            }
510         }
511         nextTag(r, xTR);
512      }
513      return m;
514   }
515
516   /*
517    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
518    *    it's not one of the expected tags.
519    * Precondition:  Must be pointing before the event we want to parse.
520    * Postcondition:  Pointing at the tag just parsed.
521    */
522   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws Exception {
523      int et = r.next();
524
525      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
526         et = r.next();
527
528      if (et == END_DOCUMENT)
529         throw new ParseException(this, "Unexpected end of document.");
530
531      HtmlTag tag = HtmlTag.forEvent(this, r);
532      if (expected.length == 0)
533         return tag;
534      for (HtmlTag t : expected)
535         if (t == tag)
536            return tag;
537
538      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
539   }
540
541   /*
542    * Skips over the current element and advances to the next element.
543    * <p>
544    * Precondition:  Pointing to opening tag.
545    * Postcondition:  Pointing to next opening tag.
546    *
547    * @param r The stream being read from.
548    * @throws XMLStreamException
549    */
550   private void skipTag(XmlReader r) throws Exception {
551      int et = r.getEventType();
552
553      if (et != START_ELEMENT)
554         throw new ParseException(this,
555            "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.",
556            XmlUtils.toReadableEvent(r)
557         );
558
559      String n = r.getLocalName();
560
561      int depth = 0;
562      while (true) {
563         et = r.next();
564         if (et == START_ELEMENT) {
565            String n2 = r.getLocalName();
566               if (n.equals(n2))
567            depth++;
568         } else if (et == END_ELEMENT) {
569            String n2 = r.getLocalName();
570            if (n.equals(n2))
571               depth--;
572            if (depth < 0)
573               return;
574         }
575      }
576   }
577
578   private void skipTag(XmlReader r, HtmlTag...expected) throws Exception {
579      HtmlTag tag = HtmlTag.forEvent(this, r);
580      if (tag.isOneOf(expected))
581         r.next();
582      else
583         throw new ParseException(this,
584            "Unexpected tag: ''{0}''.  Expected one of the following: {1}",
585            tag, expected);
586   }
587
588   private static int skipWs(XmlReader r)  throws XMLStreamException {
589      int event = r.getEventType();
590      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
591         event = r.next();
592      return event;
593   }
594
595   /**
596    * Parses CHARACTERS data.
597    *
598    * <p>
599    * Precondition:  Pointing to event immediately following opening tag.
600    * Postcondition:  Pointing to closing tag.
601    *
602    * @param r The stream being read from.
603    * @return The parsed string.
604    * @throws XMLStreamException
605    */
606   @Override /* XmlParserSession */
607   protected final String parseText(XmlReader r) throws Exception {
608
609      StringBuilder sb = getStringBuilder();
610
611      int et = r.getEventType();
612      if (et == END_ELEMENT)
613         return "";
614
615      int depth = 0;
616
617      String characters = null;
618
619      while (true) {
620         if (et == START_ELEMENT) {
621            if (characters != null) {
622               if (sb.length() == 0)
623                  characters = trimStart(characters);
624               sb.append(characters);
625               characters = null;
626            }
627            HtmlTag tag = HtmlTag.forEvent(this, r);
628            if (tag == BR) {
629               sb.append('\n');
630               r.nextTag();
631            } else if (tag == BS) {
632               sb.append('\b');
633               r.nextTag();
634            } else if (tag == SP) {
635               et = r.next();
636               if (et == CHARACTERS) {
637                  String s = r.getText();
638                  if (s.length() > 0) {
639                     char c = r.getText().charAt(0);
640                     if (c == '\u2003')
641                        c = '\t';
642                     sb.append(c);
643                  }
644                  r.nextTag();
645               }
646            } else if (tag == FF) {
647               sb.append('\f');
648               r.nextTag();
649            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
650               et = r.next();
651               if (et == CHARACTERS) {
652                  sb.append(r.getText());
653                  r.nextTag();
654               }
655            } else {
656               sb.append('<').append(r.getLocalName());
657               for (int i = 0; i < r.getAttributeCount(); i++)
658                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
659               sb.append('>');
660               depth++;
661            }
662         } else if (et == END_ELEMENT) {
663            if (characters != null) {
664               if (sb.length() == 0)
665                  characters = trimStart(characters);
666               if (depth == 0)
667                  characters = trimEnd(characters);
668               sb.append(characters);
669               characters = null;
670            }
671            if (depth == 0)
672               break;
673            sb.append('<').append(r.getLocalName()).append('>');
674            depth--;
675         } else if (et == CHARACTERS) {
676            characters = r.getText();
677         }
678         et = r.next();
679      }
680
681      String s = trim(sb.toString());
682      returnStringBuilder(sb);
683      return s;
684   }
685
686   /**
687    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
688    *
689    * <p>
690    * Precondition:  Pointing to opening tag.
691    * Postcondition:  Pointing to closing tag.
692    *
693    * @param r The stream being read from.
694    * @return The parsed string.
695    * @throws XMLStreamException
696    */
697   @Override /* XmlParserSession */
698   protected final String getElementText(XmlReader r) throws Exception {
699      r.next();
700      return parseText(r);
701   }
702
703   @Override /* XmlParserSession */
704   protected final boolean isWhitespaceElement(XmlReader r) {
705      String s = r.getLocalName();
706      return whitespaceElements.contains(s);
707   }
708
709   @Override /* XmlParserSession */
710   protected final String parseWhitespaceElement(XmlReader r) throws Exception {
711
712      HtmlTag tag = HtmlTag.forEvent(this, r);
713      int et = r.next();
714      if (tag == BR) {
715         return "\n";
716      } else if (tag == BS) {
717         return "\b";
718      } else if (tag == FF) {
719         return "\f";
720      } else if (tag == SP) {
721         if (et == CHARACTERS) {
722            String s = r.getText();
723            if (s.charAt(0) == '\u2003')
724               s = "\t";
725            r.next();
726            return decodeString(s);
727         }
728         return "";
729      } else {
730         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
731      }
732   }
733}