001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.html;
014
015import static javax.xml.stream.XMLStreamConstants.*;
016import static org.apache.juneau.html.HtmlTag.*;
017import static org.apache.juneau.internal.StringUtils.*;
018
019import java.io.IOException;
020import java.lang.reflect.*;
021import java.util.*;
022
023import javax.xml.stream.*;
024
025import org.apache.juneau.*;
026import org.apache.juneau.html.annotation.*;
027import org.apache.juneau.parser.*;
028import org.apache.juneau.transform.*;
029import org.apache.juneau.xml.*;
030
031/**
032 * Session object that lives for the duration of a single use of {@link HtmlParser}.
033 *
034 * <p>
035 * This class is NOT thread safe.
036 * It is typically discarded after one-time use although it can be reused against multiple inputs.
037 */
038@SuppressWarnings({ "unchecked", "rawtypes" })
039public final class HtmlParserSession extends XmlParserSession {
040
041   private static final Set<String> whitespaceElements = new HashSet<>(
042      Arrays.asList(
043         new String[]{"br","bs","sp","ff"}
044      )
045   );
046
047   private final HtmlParser ctx;
048
049   /**
050    * Create a new session using properties specified in the context.
051    *
052    * @param ctx
053    *    The context creating this session object.
054    *    The context contains all the configuration settings for this object.
055    * @param args
056    *    Runtime session arguments.
057    */
058   protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) {
059      super(ctx, args);
060      this.ctx = ctx;
061   }
062
063   @Override /* ParserSession */
064   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException {
065      try {
066         return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
067      } catch (XMLStreamException e) {
068         throw new ParseException(e);
069      }
070   }
071
072   @Override /* ReaderParserSession */
073   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType)
074         throws Exception {
075      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType),
076         (ClassMeta<V>)getClassMeta(valueType), null);
077   }
078
079   @Override /* ReaderParserSession */
080   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType)
081         throws Exception {
082      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
083   }
084
085   /*
086    * Reads anything starting at the current event.
087    * <p>
088    * Precondition:  Must be pointing at outer START_ELEMENT.
089    * Postcondition:  Pointing at outer END_ELEMENT.
090    */
091   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
092
093      if (eType == null)
094         eType = (ClassMeta<T>)object();
095      PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this);
096      BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
097      ClassMeta<?> sType = null;
098      if (builder != null)
099         sType = builder.getBuilderClassMeta(this);
100      else if (swap != null)
101         sType = swap.getSwapClassMeta(this);
102      else
103         sType = eType;
104
105      if (sType.isOptional())
106         return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta));
107
108      setCurrentClass(sType);
109
110      int event = r.getEventType();
111      if (event != START_ELEMENT)
112         throw new ParseException(this, "parseAnything must be called on outer start element.");
113
114      if (! isRoot)
115         event = r.next();
116      boolean isEmpty = (event == END_ELEMENT);
117
118      // Skip until we find a start element, end document, or non-empty text.
119      if (! isEmpty)
120         event = skipWs(r);
121
122      if (event == END_DOCUMENT)
123         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
124
125      // Handle @Html(asXml=true) beans.
126      HtmlClassMeta hcm = getHtmlClassMeta(sType);
127      if (hcm.getFormat() == HtmlFormat.XML)
128         return super.parseAnything(eType, null, r, outer, false, pMeta);
129
130      Object o = null;
131
132      boolean isValid = true;
133      HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
134
135      // If it's not a known tag, then parse it as XML.
136      // Allows us to parse stuff like "<div/>" into HTML5 beans.
137      if (tag == null && event != CHARACTERS)
138         return super.parseAnything(eType, null, r, outer, false, pMeta);
139
140      if (tag == HTML)
141         tag = skipToData(r);
142
143      if (isEmpty) {
144         o = "";
145      } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) {
146         String text = parseText(r);
147         if (sType.isObject() || sType.isCharSequence())
148            o = text;
149         else if (sType.isChar())
150            o = parseCharacter(text);
151         else if (sType.isBoolean())
152            o = Boolean.parseBoolean(text);
153         else if (sType.isNumber())
154            o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass());
155         else if (sType.canCreateNewInstanceFromString(outer))
156            o = sType.newInstanceFromString(outer, text);
157         else
158            isValid = false;
159
160      } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) {
161         String text = getElementText(r);
162         if (sType.isObject() || sType.isCharSequence())
163            o = text;
164         else if (sType.isChar())
165            o = parseCharacter(text);
166         else if (sType.canCreateNewInstanceFromString(outer))
167            o = sType.newInstanceFromString(outer, text);
168         else
169            isValid = false;
170         skipTag(r, tag == STRING ? xSTRING : xA);
171
172      } else if (tag == NUMBER) {
173         String text = getElementText(r);
174         if (sType.isObject())
175            o = parseNumber(text, Number.class);
176         else if (sType.isNumber())
177            o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass());
178         else
179            isValid = false;
180         skipTag(r, xNUMBER);
181
182      } else if (tag == BOOLEAN) {
183         String text = getElementText(r);
184         if (sType.isObject() || sType.isBoolean())
185            o = Boolean.parseBoolean(text);
186         else
187            isValid = false;
188         skipTag(r, xBOOLEAN);
189
190      } else if (tag == P) {
191         String text = getElementText(r);
192         if (! "No Results".equals(text))
193            isValid = false;
194         skipTag(r, xP);
195
196      } else if (tag == NULL) {
197         skipTag(r, NULL);
198         skipTag(r, xNULL);
199
200      } else if (tag == A) {
201         o = parseAnchor(r, eType);
202         skipTag(r, xA);
203
204      } else if (tag == TABLE) {
205
206         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
207         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
208
209         if (cm != null) {
210            sType = eType = cm;
211            typeName = sType.isCollectionOrArray() ? "array" : "object";
212         } else if (! "array".equals(typeName)) {
213            // Type name could be a subtype name.
214            typeName = sType.isCollectionOrArray() ? "array" : "object";
215         }
216
217         if (typeName.equals("object")) {
218            if (sType.isObject()) {
219               o = parseIntoMap(r, (Map)new ObjectMap(this), sType.getKeyType(), sType.getValueType(),
220                  pMeta);
221            } else if (sType.isMap()) {
222               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer)
223                  : new ObjectMap(this)), sType.getKeyType(), sType.getValueType(), pMeta);
224            } else if (builder != null) {
225               BeanMap m = toBeanMap(builder.create(this, eType));
226               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
227            } else if (sType.canCreateNewBean(outer)) {
228               BeanMap m = newBeanMap(outer, sType.getInnerClass());
229               o = parseIntoBean(r, m).getBean();
230            } else {
231               isValid = false;
232            }
233            skipTag(r, xTABLE);
234
235         } else if (typeName.equals("array")) {
236            if (sType.isObject())
237               o = parseTableIntoCollection(r, (Collection)new ObjectList(this), sType, pMeta);
238            else if (sType.isCollection())
239               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
240                  ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
241            else if (sType.isArray() || sType.isArgs()) {
242               ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta);
243               o = toArray(sType, l);
244            }
245            else
246               isValid = false;
247            skipTag(r, xTABLE);
248
249         } else {
250            isValid = false;
251         }
252
253      } else if (tag == UL) {
254         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
255         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
256         if (cm != null)
257            sType = eType = cm;
258
259         if (sType.isObject())
260            o = parseIntoCollection(r, new ObjectList(this), sType, pMeta);
261         else if (sType.isCollection() || sType.isObject())
262            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
263               ? sType.newInstance(outer) : new ObjectList(this)), sType, pMeta);
264         else if (sType.isArray() || sType.isArgs())
265            o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta));
266         else
267            isValid = false;
268         skipTag(r, xUL);
269
270      }
271
272      if (! isValid)
273         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
274
275      if (swap != null && o != null)
276         o = unswap(swap, o, eType);
277
278      if (outer != null)
279         setParent(eType, o, outer);
280
281      skipWs(r);
282      return (T)o;
283   }
284
285   /*
286    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
287    */
288   private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException {
289      while (true) {
290         int event = r.next();
291         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
292            r.nextTag();
293            event = r.getEventType();
294            boolean isEmpty = (event == END_ELEMENT);
295            // Skip until we find a start element, end document, or non-empty text.
296            if (! isEmpty)
297               event = skipWs(r);
298            if (event == END_DOCUMENT)
299               throw new ParseException(this, "Unexpected end of stream looking for data.");
300            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
301         }
302      }
303   }
304
305   private static String getAttribute(XmlReader r, String name, String def) {
306      for (int i = 0; i < r.getAttributeCount(); i++)
307         if (r.getAttributeLocalName(i).equals(name))
308            return r.getAttributeValue(i);
309      return def;
310   }
311
312   /*
313    * Reads an anchor tag and converts it into a bean.
314    */
315   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType)
316         throws IOException, ParseException, XMLStreamException {
317      String href = r.getAttributeValue(null, "href");
318      String name = getElementText(r);
319      if (beanType.hasAnnotation(HtmlLink.class)) {
320         HtmlLink h = beanType.getAnnotation(HtmlLink.class);
321         BeanMap<T> m = newBeanMap(beanType.getInnerClass());
322         m.put(h.uriProperty(), href);
323         m.put(h.nameProperty(), name);
324         return m.getBean();
325      }
326      return convertToType(href, beanType);
327   }
328
329   private static Map<String,String> getAttributes(XmlReader r) {
330      Map<String,String> m = new TreeMap<>() ;
331      for (int i = 0; i < r.getAttributeCount(); i++)
332         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
333      return m;
334   }
335
336   /*
337    * Reads contents of <table> element.
338    * Precondition:  Must be pointing at <table> event.
339    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
340    */
341   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType,
342         ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
343      while (true) {
344         HtmlTag tag = nextTag(r, TR, xTABLE);
345         if (tag == xTABLE)
346            break;
347         tag = nextTag(r, TD, TH);
348         // Skip over the column headers.
349         if (tag == TH) {
350            skipTag(r);
351            r.nextTag();
352            skipTag(r);
353         } else {
354            K key = parseAnything(keyType, r, m, false, pMeta);
355            nextTag(r, TD);
356            V value = parseAnything(valueType, r, m, false, pMeta);
357            setName(valueType, value, key);
358            m.put(key, value);
359         }
360         nextTag(r, xTR);
361      }
362
363      return m;
364   }
365
366   /*
367    * Reads contents of <ul> element.
368    * Precondition:  Must be pointing at event following <ul> event.
369    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
370    */
371   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l,
372         ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
373      int argIndex = 0;
374      while (true) {
375         HtmlTag tag = nextTag(r, LI, xUL);
376         if (tag == xUL)
377            break;
378         ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
379         l.add((E)parseAnything(elementType, r, l, false, pMeta));
380      }
381      return l;
382   }
383
384   /*
385    * Reads contents of <ul> element.
386    * Precondition:  Must be pointing at event following <ul> event.
387    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
388    */
389   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l,
390         ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
391
392      HtmlTag tag = nextTag(r, TR);
393      List<String> keys = new ArrayList<>();
394      while (true) {
395         tag = nextTag(r, TH, xTR);
396         if (tag == xTR)
397            break;
398         keys.add(getElementText(r));
399      }
400
401      int argIndex = 0;
402
403      while (true) {
404         r.nextTag();
405         tag = HtmlTag.forEvent(this, r);
406         if (tag == xTABLE)
407            break;
408
409         ClassMeta elementType = null;
410         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
411         if (beanType != null)
412            elementType = getClassMeta(beanType, pMeta, null);
413         if (elementType == null)
414            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
415         if (elementType == null)
416            elementType = object();
417
418         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
419
420         if (builder != null || elementType.canCreateNewBean(l)) {
421            BeanMap m =
422               builder != null
423               ? toBeanMap(builder.create(this, elementType))
424               : newBeanMap(l, elementType.getInnerClass())
425            ;
426            for (int i = 0; i < keys.size(); i++) {
427               tag = nextTag(r, TD, NULL);
428               if (tag == NULL) {
429                  m = null;
430                  nextTag(r, xNULL);
431                  break;
432               }
433               String key = keys.get(i);
434               BeanMapEntry e = m.getProperty(key);
435               if (e == null) {
436                  //onUnknownProperty(key, m, -1, -1);
437                  parseAnything(object(), r, l, false, null);
438               } else {
439                  BeanPropertyMeta bpm = e.getMeta();
440                  ClassMeta<?> cm = bpm.getClassMeta();
441                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
442                  setName(cm, value, key);
443                  bpm.set(m, key, value);
444               }
445            }
446            l.add(
447               m == null
448               ? null
449               : builder != null
450                  ? builder.build(this, m.getBean(), elementType)
451                  : (E)m.getBean()
452            );
453         } else {
454            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
455            Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l)
456               : new ObjectMap(this));
457            for (int i = 0; i < keys.size(); i++) {
458               tag = nextTag(r, TD, NULL);
459               if (tag == NULL) {
460                  m = null;
461                  nextTag(r, xNULL);
462                  break;
463               }
464               String key = keys.get(i);
465               if (m != null) {
466                  ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType();
467                  Object value = parseAnything(vt, r, l, false, pMeta);
468                  setName(vt, value, key);
469                  m.put(convertToType(key, kt), value);
470               }
471            }
472            if (m != null && c != null) {
473               ObjectMap m2 = (m instanceof ObjectMap ? (ObjectMap)m : new ObjectMap(m).setBeanSession(this));
474               m2.put(getBeanTypePropertyName(type.getElementType()), c);
475               l.add((E)cast(m2, pMeta, elementType));
476            } else {
477               l.add((E)m);
478            }
479         }
480         nextTag(r, xTR);
481      }
482      return l;
483   }
484
485   /*
486    * Reads contents of <table> element.
487    * Precondition:  Must be pointing at event following <table> event.
488    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
489    */
490   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException {
491      while (true) {
492         HtmlTag tag = nextTag(r, TR, xTABLE);
493         if (tag == xTABLE)
494            break;
495         tag = nextTag(r, TD, TH);
496         // Skip over the column headers.
497         if (tag == TH) {
498            skipTag(r);
499            r.nextTag();
500            skipTag(r);
501         } else {
502            String key = getElementText(r);
503            nextTag(r, TD);
504            BeanPropertyMeta pMeta = m.getPropertyMeta(key);
505            if (pMeta == null) {
506               onUnknownProperty(key, m);
507               parseAnything(object(), r, null, false, null);
508            } else {
509               ClassMeta<?> cm = pMeta.getClassMeta();
510               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
511               setName(cm, value, key);
512               pMeta.set(m, key, value);
513            }
514         }
515         HtmlTag t = nextTag(r, xTD, xTR);
516         if (t == xTD)
517            nextTag(r, xTR);
518      }
519      return m;
520   }
521
522   /*
523    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
524    *    it's not one of the expected tags.
525    * Precondition:  Must be pointing before the event we want to parse.
526    * Postcondition:  Pointing at the tag just parsed.
527    */
528   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
529      int et = r.next();
530
531      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
532         et = r.next();
533
534      if (et == END_DOCUMENT)
535         throw new ParseException(this, "Unexpected end of document.");
536
537      HtmlTag tag = HtmlTag.forEvent(this, r);
538      if (expected.length == 0)
539         return tag;
540      for (HtmlTag t : expected)
541         if (t == tag)
542            return tag;
543
544      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
545   }
546
547   /*
548    * Skips over the current element and advances to the next element.
549    * <p>
550    * Precondition:  Pointing to opening tag.
551    * Postcondition:  Pointing to next opening tag.
552    *
553    * @param r The stream being read from.
554    * @throws XMLStreamException
555    */
556   private void skipTag(XmlReader r) throws ParseException, XMLStreamException {
557      int et = r.getEventType();
558
559      if (et != START_ELEMENT)
560         throw new ParseException(this,
561            "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.",
562            XmlUtils.toReadableEvent(r)
563         );
564
565      String n = r.getLocalName();
566
567      int depth = 0;
568      while (true) {
569         et = r.next();
570         if (et == START_ELEMENT) {
571            String n2 = r.getLocalName();
572               if (n.equals(n2))
573            depth++;
574         } else if (et == END_ELEMENT) {
575            String n2 = r.getLocalName();
576            if (n.equals(n2))
577               depth--;
578            if (depth < 0)
579               return;
580         }
581      }
582   }
583
584   private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
585      HtmlTag tag = HtmlTag.forEvent(this, r);
586      if (tag.isOneOf(expected))
587         r.next();
588      else
589         throw new ParseException(this,
590            "Unexpected tag: ''{0}''.  Expected one of the following: {1}",
591            tag, expected);
592   }
593
594   private static int skipWs(XmlReader r)  throws XMLStreamException {
595      int event = r.getEventType();
596      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
597         event = r.next();
598      return event;
599   }
600
601   /**
602    * Parses CHARACTERS data.
603    *
604    * <p>
605    * Precondition:  Pointing to event immediately following opening tag.
606    * Postcondition:  Pointing to closing tag.
607    *
608    * @param r The stream being read from.
609    * @return The parsed string.
610    * @throws XMLStreamException Thrown by underlying XML stream.
611    */
612   @Override /* XmlParserSession */
613   protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException {
614
615      StringBuilder sb = getStringBuilder();
616
617      int et = r.getEventType();
618      if (et == END_ELEMENT)
619         return "";
620
621      int depth = 0;
622
623      String characters = null;
624
625      while (true) {
626         if (et == START_ELEMENT) {
627            if (characters != null) {
628               if (sb.length() == 0)
629                  characters = trimStart(characters);
630               sb.append(characters);
631               characters = null;
632            }
633            HtmlTag tag = HtmlTag.forEvent(this, r);
634            if (tag == BR) {
635               sb.append('\n');
636               r.nextTag();
637            } else if (tag == BS) {
638               sb.append('\b');
639               r.nextTag();
640            } else if (tag == SP) {
641               et = r.next();
642               if (et == CHARACTERS) {
643                  String s = r.getText();
644                  if (s.length() > 0) {
645                     char c = r.getText().charAt(0);
646                     if (c == '\u2003')
647                        c = '\t';
648                     sb.append(c);
649                  }
650                  r.nextTag();
651               }
652            } else if (tag == FF) {
653               sb.append('\f');
654               r.nextTag();
655            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
656               et = r.next();
657               if (et == CHARACTERS) {
658                  sb.append(r.getText());
659                  r.nextTag();
660               }
661            } else {
662               sb.append('<').append(r.getLocalName());
663               for (int i = 0; i < r.getAttributeCount(); i++)
664                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
665               sb.append('>');
666               depth++;
667            }
668         } else if (et == END_ELEMENT) {
669            if (characters != null) {
670               if (sb.length() == 0)
671                  characters = trimStart(characters);
672               if (depth == 0)
673                  characters = trimEnd(characters);
674               sb.append(characters);
675               characters = null;
676            }
677            if (depth == 0)
678               break;
679            sb.append('<').append(r.getLocalName()).append('>');
680            depth--;
681         } else if (et == CHARACTERS) {
682            characters = r.getText();
683         }
684         et = r.next();
685      }
686
687      String s = trim(sb.toString());
688      returnStringBuilder(sb);
689      return s;
690   }
691
692   /**
693    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
694    *
695    * <p>
696    * Precondition:  Pointing to opening tag.
697    * Postcondition:  Pointing to closing tag.
698    *
699    * @param r The stream being read from.
700    * @return The parsed string.
701    * @throws XMLStreamException Thrown by underlying XML stream.
702    * @throws ParseException Malformed input encountered.
703    */
704   @Override /* XmlParserSession */
705   protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException {
706      r.next();
707      return parseText(r);
708   }
709
710   @Override /* XmlParserSession */
711   protected final boolean isWhitespaceElement(XmlReader r) {
712      String s = r.getLocalName();
713      return whitespaceElements.contains(s);
714   }
715
716   @Override /* XmlParserSession */
717   protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException {
718
719      HtmlTag tag = HtmlTag.forEvent(this, r);
720      int et = r.next();
721      if (tag == BR) {
722         return "\n";
723      } else if (tag == BS) {
724         return "\b";
725      } else if (tag == FF) {
726         return "\f";
727      } else if (tag == SP) {
728         if (et == CHARACTERS) {
729            String s = r.getText();
730            if (s.charAt(0) == '\u2003')
731               s = "\t";
732            r.next();
733            return decodeString(s);
734         }
735         return "";
736      } else {
737         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
738      }
739   }
740
741   //-----------------------------------------------------------------------------------------------------------------
742   // Extended metadata
743   //-----------------------------------------------------------------------------------------------------------------
744
745   /**
746    * Returns the language-specific metadata on the specified class.
747    *
748    * @param cm The class to return the metadata on.
749    * @return The metadata.
750    */
751   protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) {
752      return ctx.getHtmlClassMeta(cm);
753   }
754
755   /**
756    * Returns the language-specific metadata on the specified bean property.
757    *
758    * @param bpm The bean property to return the metadata on.
759    * @return The metadata.
760    */
761   protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) {
762      return ctx.getHtmlBeanPropertyMeta(bpm);
763   }
764
765   //-----------------------------------------------------------------------------------------------------------------
766   // Other methods
767   //-----------------------------------------------------------------------------------------------------------------
768
769   @Override /* Session */
770   public ObjectMap toMap() {
771      return super.toMap()
772         .append("HtmlParserSession", new DefaultFilteringObjectMap()
773         );
774   }
775}