001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.html;
014
015import static javax.xml.stream.XMLStreamConstants.*;
016import static org.apache.juneau.html.HtmlTag.*;
017import static org.apache.juneau.internal.StringUtils.*;
018
019import java.io.IOException;
020import java.lang.reflect.*;
021import java.util.*;
022
023import javax.xml.stream.*;
024
025import org.apache.juneau.*;
026import org.apache.juneau.html.annotation.*;
027import org.apache.juneau.parser.*;
028import org.apache.juneau.transform.*;
029import org.apache.juneau.xml.*;
030
031/**
032 * Session object that lives for the duration of a single use of {@link HtmlParser}.
033 *
034 * <p>
035 * This class is NOT thread safe.
036 * It is typically discarded after one-time use although it can be reused against multiple inputs.
037 */
038@SuppressWarnings({ "unchecked", "rawtypes" })
039public final class HtmlParserSession extends XmlParserSession {
040
041   private static final Set<String> whitespaceElements = new HashSet<>(
042      Arrays.asList(
043         new String[]{"br","bs","sp","ff"}
044      )
045   );
046
047   /**
048    * Create a new session using properties specified in the context.
049    *
050    * @param ctx
051    *    The context creating this session object.
052    *    The context contains all the configuration settings for this object.
053    * @param args
054    *    Runtime session arguments.
055    */
056   protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) {
057      super(ctx, args);
058   }
059
060   @Override /* ParserSession */
061   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException {
062      try {
063         return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
064      } catch (XMLStreamException e) {
065         throw new ParseException(e);
066      }
067   }
068
069   @Override /* ReaderParserSession */
070   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType)
071         throws Exception {
072      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType),
073         (ClassMeta<V>)getClassMeta(valueType), null);
074   }
075
076   @Override /* ReaderParserSession */
077   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType)
078         throws Exception {
079      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
080   }
081
082   /*
083    * Reads anything starting at the current event.
084    * <p>
085    * Precondition:  Must be pointing at outer START_ELEMENT.
086    * Postcondition:  Pointing at outer END_ELEMENT.
087    */
088   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
089
090      if (eType == null)
091         eType = (ClassMeta<T>)object();
092      PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this);
093      BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
094      ClassMeta<?> sType = null;
095      if (builder != null)
096         sType = builder.getBuilderClassMeta(this);
097      else if (swap != null)
098         sType = swap.getSwapClassMeta(this);
099      else
100         sType = eType;
101
102      if (sType.isOptional()) 
103         return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta));
104
105      setCurrentClass(sType);
106
107      int event = r.getEventType();
108      if (event != START_ELEMENT)
109         throw new ParseException(this, "parseAnything must be called on outer start element.");
110
111      if (! isRoot)
112         event = r.next();
113      boolean isEmpty = (event == END_ELEMENT);
114
115      // Skip until we find a start element, end document, or non-empty text.
116      if (! isEmpty)
117         event = skipWs(r);
118
119      if (event == END_DOCUMENT)
120         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
121
122      // Handle @Html(asXml=true) beans.
123      HtmlClassMeta hcm = sType.getExtendedMeta(HtmlClassMeta.class);
124      if (hcm.getFormat() == HtmlFormat.XML)
125         return super.parseAnything(eType, null, r, outer, false, pMeta);
126
127      Object o = null;
128
129      boolean isValid = true;
130      HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
131
132      // If it's not a known tag, then parse it as XML.
133      // Allows us to parse stuff like "<div/>" into HTML5 beans.
134      if (tag == null && event != CHARACTERS)
135         return super.parseAnything(eType, null, r, outer, false, pMeta);
136
137      if (tag == HTML)
138         tag = skipToData(r);
139
140      if (isEmpty) {
141         o = "";
142      } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) {
143         String text = parseText(r);
144         if (sType.isObject() || sType.isCharSequence())
145            o = text;
146         else if (sType.isChar())
147            o = parseCharacter(text);
148         else if (sType.isBoolean())
149            o = Boolean.parseBoolean(text);
150         else if (sType.isNumber())
151            o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass());
152         else if (sType.canCreateNewInstanceFromString(outer))
153            o = sType.newInstanceFromString(outer, text);
154         else
155            isValid = false;
156
157      } else if (tag == STRING || (tag == A && pMeta != null
158            && pMeta.getExtendedMeta(HtmlBeanPropertyMeta.class).getLink() != null)) {
159         String text = getElementText(r);
160         if (sType.isObject() || sType.isCharSequence())
161            o = text;
162         else if (sType.isChar())
163            o = parseCharacter(text);
164         else if (sType.canCreateNewInstanceFromString(outer))
165            o = sType.newInstanceFromString(outer, text);
166         else
167            isValid = false;
168         skipTag(r, tag == STRING ? xSTRING : xA);
169
170      } else if (tag == NUMBER) {
171         String text = getElementText(r);
172         if (sType.isObject())
173            o = parseNumber(text, Number.class);
174         else if (sType.isNumber())
175            o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass());
176         else
177            isValid = false;
178         skipTag(r, xNUMBER);
179
180      } else if (tag == BOOLEAN) {
181         String text = getElementText(r);
182         if (sType.isObject() || sType.isBoolean())
183            o = Boolean.parseBoolean(text);
184         else
185            isValid = false;
186         skipTag(r, xBOOLEAN);
187
188      } else if (tag == P) {
189         String text = getElementText(r);
190         if (! "No Results".equals(text))
191            isValid = false;
192         skipTag(r, xP);
193
194      } else if (tag == NULL) {
195         skipTag(r, NULL);
196         skipTag(r, xNULL);
197
198      } else if (tag == A) {
199         o = parseAnchor(r, eType);
200         skipTag(r, xA);
201
202      } else if (tag == TABLE) {
203
204         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
205         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
206
207         if (cm != null) {
208            sType = eType = cm;
209            typeName = sType.isCollectionOrArray() ? "array" : "object";
210         } else if (! "array".equals(typeName)) {
211            // Type name could be a subtype name.
212            typeName = sType.isCollectionOrArray() ? "array" : "object";
213         }
214
215         if (typeName.equals("object")) {
216            if (sType.isObject()) {
217               o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(),
218                  pMeta);
219            } else if (sType.isMap()) {
220               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer)
221                  : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta);
222            } else if (builder != null) {
223               BeanMap m = toBeanMap(builder.create(this, eType));
224               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
225            } else if (sType.canCreateNewBean(outer)) {
226               BeanMap m = newBeanMap(outer, sType.getInnerClass());
227               o = parseIntoBean(r, m).getBean();
228            } else {
229               isValid = false;
230            }
231            skipTag(r, xTABLE);
232
233         } else if (typeName.equals("array")) {
234            if (sType.isObject())
235               o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta);
236            else if (sType.isCollection())
237               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
238                  ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
239            else if (sType.isArray() || sType.isArgs()) {
240               ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta);
241               o = toArray(sType, l);
242            }
243            else
244               isValid = false;
245            skipTag(r, xTABLE);
246
247         } else {
248            isValid = false;
249         }
250
251      } else if (tag == UL) {
252         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
253         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
254         if (cm != null)
255            sType = eType = cm;
256
257         if (sType.isObject())
258            o = parseIntoCollection(r, new ObjectList(this), sType, pMeta);
259         else if (sType.isCollection() || sType.isObject())
260            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
261               ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
262         else if (sType.isArray() || sType.isArgs())
263            o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta));
264         else
265            isValid = false;
266         skipTag(r, xUL);
267
268      }
269
270      if (! isValid)
271         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
272
273      if (swap != null && o != null)
274         o = unswap(swap, o, eType);
275
276      if (outer != null)
277         setParent(eType, o, outer);
278
279      skipWs(r);
280      return (T)o;
281   }
282
283   /*
284    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
285    */
286   private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException {
287      while (true) {
288         int event = r.next();
289         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
290            r.nextTag();
291            event = r.getEventType();
292            boolean isEmpty = (event == END_ELEMENT);
293            // Skip until we find a start element, end document, or non-empty text.
294            if (! isEmpty)
295               event = skipWs(r);
296            if (event == END_DOCUMENT)
297               throw new ParseException(this, "Unexpected end of stream looking for data.");
298            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
299         }
300      }
301   }
302
303   private static String getAttribute(XmlReader r, String name, String def) {
304      for (int i = 0; i < r.getAttributeCount(); i++)
305         if (r.getAttributeLocalName(i).equals(name))
306            return r.getAttributeValue(i);
307      return def;
308   }
309
310   /*
311    * Reads an anchor tag and converts it into a bean.
312    */
313   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType)
314         throws IOException, ParseException, XMLStreamException {
315      String href = r.getAttributeValue(null, "href");
316      String name = getElementText(r);
317      Class<T> beanClass = beanType.getInnerClass();
318      if (beanClass.isAnnotationPresent(HtmlLink.class)) {
319         HtmlLink h = beanClass.getAnnotation(HtmlLink.class);
320         BeanMap<T> m = newBeanMap(beanClass);
321         m.put(h.uriProperty(), href);
322         m.put(h.nameProperty(), name);
323         return m.getBean();
324      }
325      return convertToType(href, beanType);
326   }
327
328   private static Map<String,String> getAttributes(XmlReader r) {
329      Map<String,String> m = new TreeMap<>() ;
330      for (int i = 0; i < r.getAttributeCount(); i++)
331         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
332      return m;
333   }
334
335   /*
336    * Reads contents of <table> element.
337    * Precondition:  Must be pointing at <table> event.
338    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
339    */
340   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType,
341         ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
342      while (true) {
343         HtmlTag tag = nextTag(r, TR, xTABLE);
344         if (tag == xTABLE)
345            break;
346         tag = nextTag(r, TD, TH);
347         // Skip over the column headers.
348         if (tag == TH) {
349            skipTag(r);
350            r.nextTag();
351            skipTag(r);
352         } else {
353            K key = parseAnything(keyType, r, m, false, pMeta);
354            nextTag(r, TD);
355            V value = parseAnything(valueType, r, m, false, pMeta);
356            setName(valueType, value, key);
357            m.put(key, value);
358         }
359         nextTag(r, xTR);
360      }
361
362      return m;
363   }
364
365   /*
366    * Reads contents of <ul> element.
367    * Precondition:  Must be pointing at event following <ul> event.
368    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
369    */
370   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l,
371         ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
372      int argIndex = 0;
373      while (true) {
374         HtmlTag tag = nextTag(r, LI, xUL);
375         if (tag == xUL)
376            break;
377         ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
378         l.add((E)parseAnything(elementType, r, l, false, pMeta));
379      }
380      return l;
381   }
382
383   /*
384    * Reads contents of <ul> element.
385    * Precondition:  Must be pointing at event following <ul> event.
386    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
387    */
388   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l,
389         ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
390
391      HtmlTag tag = nextTag(r, TR);
392      List<String> keys = new ArrayList<>();
393      while (true) {
394         tag = nextTag(r, TH, xTR);
395         if (tag == xTR)
396            break;
397         keys.add(getElementText(r));
398      }
399
400      int argIndex = 0;
401
402      while (true) {
403         r.nextTag();
404         tag = HtmlTag.forEvent(this, r);
405         if (tag == xTABLE)
406            break;
407
408         ClassMeta elementType = null;
409         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
410         if (beanType != null)
411            elementType = getClassMeta(beanType, pMeta, null);
412         if (elementType == null)
413            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
414         if (elementType == null)
415            elementType = object();
416
417         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
418
419         if (builder != null || elementType.canCreateNewBean(l)) {
420            BeanMap m =
421               builder != null
422               ? toBeanMap(builder.create(this, elementType))
423               : newBeanMap(l, elementType.getInnerClass())
424            ;
425            for (int i = 0; i < keys.size(); i++) {
426               tag = nextTag(r, TD, NULL);
427               if (tag == NULL) {
428                  m = null;
429                  nextTag(r, xNULL);
430                  break;
431               }
432               String key = keys.get(i);
433               BeanMapEntry e = m.getProperty(key);
434               if (e == null) {
435                  //onUnknownProperty(key, m, -1, -1);
436                  parseAnything(object(), r, l, false, null);
437               } else {
438                  BeanPropertyMeta bpm = e.getMeta();
439                  ClassMeta<?> cm = bpm.getClassMeta();
440                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
441                  setName(cm, value, key);
442                  bpm.set(m, key, value);
443               }
444            }
445            l.add(
446               m == null
447               ? null
448               : builder != null
449                  ? builder.build(this, m.getBean(), elementType)
450                  : (E)m.getBean()
451            );
452         } else {
453            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
454            Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l)
455               : new ObjectMap(this));
456            for (int i = 0; i < keys.size(); i++) {
457               tag = nextTag(r, TD, NULL);
458               if (tag == NULL) {
459                  m = null;
460                  nextTag(r, xNULL);
461                  break;
462               }
463               String key = keys.get(i);
464               if (m != null) {
465                  ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType();
466                  Object value = parseAnything(vt, r, l, false, pMeta);
467                  setName(vt, value, key);
468                  m.put(convertToType(key, kt), value);
469               }
470            }
471            if (m != null && c != null) {
472               ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this));
473               m2.put(getBeanTypePropertyName(type.getElementType()), c);
474               l.add((E)cast(m2, pMeta, elementType));
475            } else {
476               l.add((E)m);
477            }
478         }
479         nextTag(r, xTR);
480      }
481      return l;
482   }
483
484   /*
485    * Reads contents of <table> element.
486    * Precondition:  Must be pointing at event following <table> event.
487    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
488    */
489   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException {
490      while (true) {
491         HtmlTag tag = nextTag(r, TR, xTABLE);
492         if (tag == xTABLE)
493            break;
494         tag = nextTag(r, TD, TH);
495         // Skip over the column headers.
496         if (tag == TH) {
497            skipTag(r);
498            r.nextTag();
499            skipTag(r);
500         } else {
501            String key = getElementText(r);
502            nextTag(r, TD);
503            BeanPropertyMeta pMeta = m.getPropertyMeta(key);
504            if (pMeta == null) {
505               onUnknownProperty(key, m);
506               parseAnything(object(), r, null, false, null);
507            } else {
508               ClassMeta<?> cm = pMeta.getClassMeta();
509               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
510               setName(cm, value, key);
511               pMeta.set(m, key, value);
512            }
513         }
514         nextTag(r, xTR);
515      }
516      return m;
517   }
518
519   /*
520    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
521    *    it's not one of the expected tags.
522    * Precondition:  Must be pointing before the event we want to parse.
523    * Postcondition:  Pointing at the tag just parsed.
524    */
525   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
526      int et = r.next();
527
528      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
529         et = r.next();
530
531      if (et == END_DOCUMENT)
532         throw new ParseException(this, "Unexpected end of document.");
533
534      HtmlTag tag = HtmlTag.forEvent(this, r);
535      if (expected.length == 0)
536         return tag;
537      for (HtmlTag t : expected)
538         if (t == tag)
539            return tag;
540
541      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
542   }
543
544   /*
545    * Skips over the current element and advances to the next element.
546    * <p>
547    * Precondition:  Pointing to opening tag.
548    * Postcondition:  Pointing to next opening tag.
549    *
550    * @param r The stream being read from.
551    * @throws XMLStreamException
552    */
553   private void skipTag(XmlReader r) throws ParseException, XMLStreamException {
554      int et = r.getEventType();
555
556      if (et != START_ELEMENT)
557         throw new ParseException(this,
558            "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.",
559            XmlUtils.toReadableEvent(r)
560         );
561
562      String n = r.getLocalName();
563
564      int depth = 0;
565      while (true) {
566         et = r.next();
567         if (et == START_ELEMENT) {
568            String n2 = r.getLocalName();
569               if (n.equals(n2))
570            depth++;
571         } else if (et == END_ELEMENT) {
572            String n2 = r.getLocalName();
573            if (n.equals(n2))
574               depth--;
575            if (depth < 0)
576               return;
577         }
578      }
579   }
580
581   private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
582      HtmlTag tag = HtmlTag.forEvent(this, r);
583      if (tag.isOneOf(expected))
584         r.next();
585      else
586         throw new ParseException(this,
587            "Unexpected tag: ''{0}''.  Expected one of the following: {1}",
588            tag, expected);
589   }
590
591   private static int skipWs(XmlReader r)  throws XMLStreamException {
592      int event = r.getEventType();
593      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
594         event = r.next();
595      return event;
596   }
597
598   /**
599    * Parses CHARACTERS data.
600    *
601    * <p>
602    * Precondition:  Pointing to event immediately following opening tag.
603    * Postcondition:  Pointing to closing tag.
604    *
605    * @param r The stream being read from.
606    * @return The parsed string.
607    * @throws XMLStreamException Thrown by underlying XML stream.
608    */
609   @Override /* XmlParserSession */
610   protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException {
611
612      StringBuilder sb = getStringBuilder();
613
614      int et = r.getEventType();
615      if (et == END_ELEMENT)
616         return "";
617
618      int depth = 0;
619
620      String characters = null;
621
622      while (true) {
623         if (et == START_ELEMENT) {
624            if (characters != null) {
625               if (sb.length() == 0)
626                  characters = trimStart(characters);
627               sb.append(characters);
628               characters = null;
629            }
630            HtmlTag tag = HtmlTag.forEvent(this, r);
631            if (tag == BR) {
632               sb.append('\n');
633               r.nextTag();
634            } else if (tag == BS) {
635               sb.append('\b');
636               r.nextTag();
637            } else if (tag == SP) {
638               et = r.next();
639               if (et == CHARACTERS) {
640                  String s = r.getText();
641                  if (s.length() > 0) {
642                     char c = r.getText().charAt(0);
643                     if (c == '\u2003')
644                        c = '\t';
645                     sb.append(c);
646                  }
647                  r.nextTag();
648               }
649            } else if (tag == FF) {
650               sb.append('\f');
651               r.nextTag();
652            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
653               et = r.next();
654               if (et == CHARACTERS) {
655                  sb.append(r.getText());
656                  r.nextTag();
657               }
658            } else {
659               sb.append('<').append(r.getLocalName());
660               for (int i = 0; i < r.getAttributeCount(); i++)
661                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
662               sb.append('>');
663               depth++;
664            }
665         } else if (et == END_ELEMENT) {
666            if (characters != null) {
667               if (sb.length() == 0)
668                  characters = trimStart(characters);
669               if (depth == 0)
670                  characters = trimEnd(characters);
671               sb.append(characters);
672               characters = null;
673            }
674            if (depth == 0)
675               break;
676            sb.append('<').append(r.getLocalName()).append('>');
677            depth--;
678         } else if (et == CHARACTERS) {
679            characters = r.getText();
680         }
681         et = r.next();
682      }
683
684      String s = trim(sb.toString());
685      returnStringBuilder(sb);
686      return s;
687   }
688
689   /**
690    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
691    *
692    * <p>
693    * Precondition:  Pointing to opening tag.
694    * Postcondition:  Pointing to closing tag.
695    *
696    * @param r The stream being read from.
697    * @return The parsed string.
698    * @throws XMLStreamException Thrown by underlying XML stream.
699    * @throws ParseException Malformed input encountered.
700    */
701   @Override /* XmlParserSession */
702   protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException {
703      r.next();
704      return parseText(r);
705   }
706
707   @Override /* XmlParserSession */
708   protected final boolean isWhitespaceElement(XmlReader r) {
709      String s = r.getLocalName();
710      return whitespaceElements.contains(s);
711   }
712
713   @Override /* XmlParserSession */
714   protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException {
715
716      HtmlTag tag = HtmlTag.forEvent(this, r);
717      int et = r.next();
718      if (tag == BR) {
719         return "\n";
720      } else if (tag == BS) {
721         return "\b";
722      } else if (tag == FF) {
723         return "\f";
724      } else if (tag == SP) {
725         if (et == CHARACTERS) {
726            String s = r.getText();
727            if (s.charAt(0) == '\u2003')
728               s = "\t";
729            r.next();
730            return decodeString(s);
731         }
732         return "";
733      } else {
734         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
735      }
736   }
737
738   //-----------------------------------------------------------------------------------------------------------------
739   // Other methods
740   //-----------------------------------------------------------------------------------------------------------------
741
742   @Override /* Session */
743   public ObjectMap toMap() {
744      return super.toMap()
745         .append("HtmlParserSession", new DefaultFilteringObjectMap()
746         );
747   }
748}