001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.html;
014
015import static javax.xml.stream.XMLStreamConstants.*;
016import static org.apache.juneau.html.HtmlTag.*;
017import static org.apache.juneau.internal.StringUtils.*;
018
019import java.io.IOException;
020import java.lang.reflect.*;
021import java.util.*;
022
023import javax.xml.stream.*;
024
025import org.apache.juneau.*;
026import org.apache.juneau.collections.*;
027import org.apache.juneau.html.annotation.*;
028import org.apache.juneau.parser.*;
029import org.apache.juneau.transform.*;
030import org.apache.juneau.xml.*;
031
032/**
033 * Session object that lives for the duration of a single use of {@link HtmlParser}.
034 *
035 * <p>
036 * This class is NOT thread safe.
037 * It is typically discarded after one-time use although it can be reused against multiple inputs.
038 */
039@SuppressWarnings({ "unchecked", "rawtypes" })
040public final class HtmlParserSession extends XmlParserSession {
041
042   private static final Set<String> whitespaceElements = ASet.of("br","bs","sp","ff");
043
044   private final HtmlParser ctx;
045
046   /**
047    * Create a new session using properties specified in the context.
048    *
049    * @param ctx
050    *    The context creating this session object.
051    *    The context contains all the configuration settings for this object.
052    * @param args
053    *    Runtime session arguments.
054    */
055   protected HtmlParserSession(HtmlParser ctx, ParserSessionArgs args) {
056      super(ctx, args);
057      this.ctx = ctx;
058   }
059
060   @Override /* ParserSession */
061   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException {
062      try {
063         return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
064      } catch (XMLStreamException e) {
065         throw new ParseException(e);
066      }
067   }
068
069   @Override /* ReaderParserSession */
070   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType)
071         throws Exception {
072      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType),
073         (ClassMeta<V>)getClassMeta(valueType), null);
074   }
075
076   @Override /* ReaderParserSession */
077   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType)
078         throws Exception {
079      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
080   }
081
082   /*
083    * Reads anything starting at the current event.
084    * <p>
085    * Precondition:  Must be pointing at outer START_ELEMENT.
086    * Postcondition:  Pointing at outer END_ELEMENT.
087    */
088   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
089
090      if (eType == null)
091         eType = (ClassMeta<T>)object();
092      PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getSwap(this);
093      BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
094      ClassMeta<?> sType = null;
095      if (builder != null)
096         sType = builder.getBuilderClassMeta(this);
097      else if (swap != null)
098         sType = swap.getSwapClassMeta(this);
099      else
100         sType = eType;
101
102      if (sType.isOptional())
103         return (T)Optional.ofNullable(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta));
104
105      setCurrentClass(sType);
106
107      int event = r.getEventType();
108      if (event != START_ELEMENT)
109         throw new ParseException(this, "parseAnything must be called on outer start element.");
110
111      if (! isRoot)
112         event = r.next();
113      boolean isEmpty = (event == END_ELEMENT);
114
115      // Skip until we find a start element, end document, or non-empty text.
116      if (! isEmpty)
117         event = skipWs(r);
118
119      if (event == END_DOCUMENT)
120         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
121
122      // Handle @Html(asXml=true) beans.
123      HtmlClassMeta hcm = getHtmlClassMeta(sType);
124      if (hcm.getFormat() == HtmlFormat.XML)
125         return super.parseAnything(eType, null, r, outer, false, pMeta);
126
127      Object o = null;
128
129      boolean isValid = true;
130      HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
131
132      // If it's not a known tag, then parse it as XML.
133      // Allows us to parse stuff like "<div/>" into HTML5 beans.
134      if (tag == null && event != CHARACTERS)
135         return super.parseAnything(eType, null, r, outer, false, pMeta);
136
137      if (tag == HTML)
138         tag = skipToData(r);
139
140      if (isEmpty) {
141         o = "";
142      } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) {
143         String text = parseText(r);
144         if (sType.isObject() || sType.isCharSequence())
145            o = text;
146         else if (sType.isChar())
147            o = parseCharacter(text);
148         else if (sType.isBoolean())
149            o = Boolean.parseBoolean(text);
150         else if (sType.isNumber())
151            o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass());
152         else if (sType.canCreateNewInstanceFromString(outer))
153            o = sType.newInstanceFromString(outer, text);
154         else
155            isValid = false;
156
157      } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) {
158         String text = getElementText(r);
159         if (sType.isObject() || sType.isCharSequence())
160            o = text;
161         else if (sType.isChar())
162            o = parseCharacter(text);
163         else if (sType.canCreateNewInstanceFromString(outer))
164            o = sType.newInstanceFromString(outer, text);
165         else
166            isValid = false;
167         skipTag(r, tag == STRING ? xSTRING : xA);
168
169      } else if (tag == NUMBER) {
170         String text = getElementText(r);
171         if (sType.isObject())
172            o = parseNumber(text, Number.class);
173         else if (sType.isNumber())
174            o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass());
175         else
176            isValid = false;
177         skipTag(r, xNUMBER);
178
179      } else if (tag == BOOLEAN) {
180         String text = getElementText(r);
181         if (sType.isObject() || sType.isBoolean())
182            o = Boolean.parseBoolean(text);
183         else
184            isValid = false;
185         skipTag(r, xBOOLEAN);
186
187      } else if (tag == P) {
188         String text = getElementText(r);
189         if (! "No Results".equals(text))
190            isValid = false;
191         skipTag(r, xP);
192
193      } else if (tag == NULL) {
194         skipTag(r, NULL);
195         skipTag(r, xNULL);
196
197      } else if (tag == A) {
198         o = parseAnchor(r, eType);
199         skipTag(r, xA);
200
201      } else if (tag == TABLE) {
202
203         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
204         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
205
206         if (cm != null) {
207            sType = eType = cm;
208            typeName = sType.isCollectionOrArray() ? "array" : "object";
209         } else if (! "array".equals(typeName)) {
210            // Type name could be a subtype name.
211            typeName = sType.isCollectionOrArray() ? "array" : "object";
212         }
213
214         if (typeName.equals("object")) {
215            if (sType.isObject()) {
216               o = parseIntoMap(r, (Map)new OMap(this), sType.getKeyType(), sType.getValueType(),
217                  pMeta);
218            } else if (sType.isMap()) {
219               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer)
220                  : new OMap(this)), sType.getKeyType(), sType.getValueType(), pMeta);
221            } else if (builder != null) {
222               BeanMap m = toBeanMap(builder.create(this, eType));
223               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
224            } else if (sType.canCreateNewBean(outer)) {
225               BeanMap m = newBeanMap(outer, sType.getInnerClass());
226               o = parseIntoBean(r, m).getBean();
227            } else if (sType.getProxyInvocationHandler() != null) {
228               BeanMap m = newBeanMap(outer, sType.getInnerClass());
229               o = parseIntoBean(r, m).getBean();
230            } else {
231               isValid = false;
232            }
233            skipTag(r, xTABLE);
234
235         } else if (typeName.equals("array")) {
236            if (sType.isObject())
237               o = parseTableIntoCollection(r, (Collection)new OList(this), sType, pMeta);
238            else if (sType.isCollection())
239               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
240                  ? sType.newInstance(outer) : new OList(this)), sType, pMeta);
241            else if (sType.isArray() || sType.isArgs()) {
242               ArrayList l = (ArrayList)parseTableIntoCollection(r, new ArrayList(), sType, pMeta);
243               o = toArray(sType, l);
244            }
245            else
246               isValid = false;
247            skipTag(r, xTABLE);
248
249         } else {
250            isValid = false;
251         }
252
253      } else if (tag == UL) {
254         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
255         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
256         if (cm != null)
257            sType = eType = cm;
258
259         if (sType.isObject())
260            o = parseIntoCollection(r, new OList(this), sType, pMeta);
261         else if (sType.isCollection() || sType.isObject())
262            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
263               ? sType.newInstance(outer) : new OList(this)), sType, pMeta);
264         else if (sType.isArray() || sType.isArgs())
265            o = toArray(sType, parseIntoCollection(r, new ArrayList(), sType, pMeta));
266         else
267            isValid = false;
268         skipTag(r, xUL);
269
270      }
271
272      if (! isValid)
273         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
274
275      if (swap != null && o != null)
276         o = unswap(swap, o, eType);
277
278      if (outer != null)
279         setParent(eType, o, outer);
280
281      skipWs(r);
282      return (T)o;
283   }
284
285   /*
286    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
287    */
288   private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException {
289      while (true) {
290         int event = r.next();
291         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
292            r.nextTag();
293            event = r.getEventType();
294            boolean isEmpty = (event == END_ELEMENT);
295            // Skip until we find a start element, end document, or non-empty text.
296            if (! isEmpty)
297               event = skipWs(r);
298            if (event == END_DOCUMENT)
299               throw new ParseException(this, "Unexpected end of stream looking for data.");
300            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
301         }
302      }
303   }
304
305   private static String getAttribute(XmlReader r, String name, String def) {
306      for (int i = 0; i < r.getAttributeCount(); i++)
307         if (r.getAttributeLocalName(i).equals(name))
308            return r.getAttributeValue(i);
309      return def;
310   }
311
312   /*
313    * Reads an anchor tag and converts it into a bean.
314    */
315   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType)
316         throws IOException, ParseException, XMLStreamException {
317      String href = r.getAttributeValue(null, "href");
318      String name = getElementText(r);
319      if (beanType.hasAnnotation(HtmlLink.class)) {
320         String uriProperty = "", nameProperty = "";
321         for (HtmlLink a : beanType.getAnnotations(HtmlLink.class)) {
322            if (! a.uriProperty().isEmpty())
323               uriProperty = a.uriProperty();
324            if (! a.nameProperty().isEmpty())
325               nameProperty = a.nameProperty();
326         }
327         BeanMap<T> m = newBeanMap(beanType.getInnerClass());
328         m.put(uriProperty, href);
329         m.put(nameProperty, name);
330         return m.getBean();
331      }
332      return convertToType(href, beanType);
333   }
334
335   private static Map<String,String> getAttributes(XmlReader r) {
336      Map<String,String> m = new TreeMap<>() ;
337      for (int i = 0; i < r.getAttributeCount(); i++)
338         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
339      return m;
340   }
341
342   /*
343    * Reads contents of <table> element.
344    * Precondition:  Must be pointing at <table> event.
345    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
346    */
347   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType,
348         ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
349      while (true) {
350         HtmlTag tag = nextTag(r, TR, xTABLE);
351         if (tag == xTABLE)
352            break;
353         tag = nextTag(r, TD, TH);
354         // Skip over the column headers.
355         if (tag == TH) {
356            skipTag(r);
357            r.nextTag();
358            skipTag(r);
359         } else {
360            K key = parseAnything(keyType, r, m, false, pMeta);
361            nextTag(r, TD);
362            V value = parseAnything(valueType, r, m, false, pMeta);
363            setName(valueType, value, key);
364            m.put(key, value);
365         }
366         tag = nextTag(r, xTD, xTR);
367         if (tag == xTD)
368            nextTag(r, xTR);
369      }
370
371      return m;
372   }
373
374   /*
375    * Reads contents of <ul> element.
376    * Precondition:  Must be pointing at event following <ul> event.
377    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
378    */
379   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l,
380         ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
381      int argIndex = 0;
382      while (true) {
383         HtmlTag tag = nextTag(r, LI, xUL, xLI);
384         if (tag == xLI)
385            tag = nextTag(r, LI, xUL, xLI);
386         if (tag == xUL)
387            break;
388         ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
389         l.add((E)parseAnything(elementType, r, l, false, pMeta));
390      }
391      return l;
392   }
393
394   /*
395    * Reads contents of <ul> element.
396    * Precondition:  Must be pointing at event following <ul> event.
397    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
398    */
399   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l,
400         ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
401
402      HtmlTag tag = nextTag(r, TR);
403      List<String> keys = new ArrayList<>();
404      while (true) {
405         tag = nextTag(r, TH, xTR);
406         if (tag == xTR)
407            break;
408         keys.add(getElementText(r));
409      }
410
411      int argIndex = 0;
412
413      while (true) {
414         r.nextTag();
415         tag = HtmlTag.forEvent(this, r);
416         if (tag == xTABLE)
417            break;
418
419         ClassMeta elementType = null;
420         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
421         if (beanType != null)
422            elementType = getClassMeta(beanType, pMeta, null);
423         if (elementType == null)
424            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
425         if (elementType == null)
426            elementType = object();
427
428         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
429
430         if (builder != null || elementType.canCreateNewBean(l)) {
431            BeanMap m =
432               builder != null
433               ? toBeanMap(builder.create(this, elementType))
434               : newBeanMap(l, elementType.getInnerClass())
435            ;
436            for (int i = 0; i < keys.size(); i++) {
437               tag = nextTag(r, xTD, TD, NULL);
438               if (tag == xTD)
439                  tag = nextTag(r, TD, NULL);
440               if (tag == NULL) {
441                  m = null;
442                  nextTag(r, xNULL);
443                  break;
444               }
445               String key = keys.get(i);
446               BeanMapEntry e = m.getProperty(key);
447               if (e == null) {
448                  //onUnknownProperty(key, m, -1, -1);
449                  parseAnything(object(), r, l, false, null);
450               } else {
451                  BeanPropertyMeta bpm = e.getMeta();
452                  ClassMeta<?> cm = bpm.getClassMeta();
453                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
454                  setName(cm, value, key);
455                  bpm.set(m, key, value);
456               }
457            }
458            l.add(
459               m == null
460               ? null
461               : builder != null
462                  ? builder.build(this, m.getBean(), elementType)
463                  : (E)m.getBean()
464            );
465         } else {
466            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
467            Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l)
468               : new OMap(this));
469            for (int i = 0; i < keys.size(); i++) {
470               tag = nextTag(r, TD, NULL);
471               if (tag == NULL) {
472                  m = null;
473                  nextTag(r, xNULL);
474                  break;
475               }
476               String key = keys.get(i);
477               if (m != null) {
478                  ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType();
479                  Object value = parseAnything(vt, r, l, false, pMeta);
480                  setName(vt, value, key);
481                  m.put(convertToType(key, kt), value);
482               }
483            }
484            if (m != null && c != null) {
485               OMap m2 = (m instanceof OMap ? (OMap)m : new OMap(m).session(this));
486               m2.put(getBeanTypePropertyName(type.getElementType()), c);
487               l.add((E)cast(m2, pMeta, elementType));
488            } else {
489               if (m instanceof OMap)
490                  l.add((E)convertToType(m, elementType));
491               else
492                  l.add((E)m);
493            }
494         }
495         nextTag(r, xTR);
496      }
497      return l;
498   }
499
500   /*
501    * Reads contents of <table> element.
502    * Precondition:  Must be pointing at event following <table> event.
503    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
504    */
505   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException {
506      while (true) {
507         HtmlTag tag = nextTag(r, TR, xTABLE);
508         if (tag == xTABLE)
509            break;
510         tag = nextTag(r, TD, TH);
511         // Skip over the column headers.
512         if (tag == TH) {
513            skipTag(r);
514            r.nextTag();
515            skipTag(r);
516         } else {
517            String key = getElementText(r);
518            nextTag(r, TD);
519            BeanPropertyMeta pMeta = m.getPropertyMeta(key);
520            if (pMeta == null) {
521               onUnknownProperty(key, m, parseAnything(object(), r, null, false, null));
522            } else {
523               ClassMeta<?> cm = pMeta.getClassMeta();
524               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
525               setName(cm, value, key);
526               try {
527                  pMeta.set(m, key, value);
528               } catch (BeanRuntimeException e) {
529                  onBeanSetterException(pMeta, e);
530                  throw e;
531               }
532            }
533         }
534         HtmlTag t = nextTag(r, xTD, xTR);
535         if (t == xTD)
536            nextTag(r, xTR);
537      }
538      return m;
539   }
540
541   /*
542    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
543    *    it's not one of the expected tags.
544    * Precondition:  Must be pointing before the event we want to parse.
545    * Postcondition:  Pointing at the tag just parsed.
546    */
547   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
548      int et = r.next();
549
550      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
551         et = r.next();
552
553      if (et == END_DOCUMENT)
554         throw new ParseException(this, "Unexpected end of document.");
555
556      HtmlTag tag = HtmlTag.forEvent(this, r);
557      if (expected.length == 0)
558         return tag;
559      for (HtmlTag t : expected)
560         if (t == tag)
561            return tag;
562
563      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
564   }
565
566   /*
567    * Skips over the current element and advances to the next element.
568    * <p>
569    * Precondition:  Pointing to opening tag.
570    * Postcondition:  Pointing to next opening tag.
571    *
572    * @param r The stream being read from.
573    * @throws XMLStreamException
574    */
575   private void skipTag(XmlReader r) throws ParseException, XMLStreamException {
576      int et = r.getEventType();
577
578      if (et != START_ELEMENT)
579         throw new ParseException(this,
580            "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.",
581            XmlUtils.toReadableEvent(r)
582         );
583
584      String n = r.getLocalName();
585
586      int depth = 0;
587      while (true) {
588         et = r.next();
589         if (et == START_ELEMENT) {
590            String n2 = r.getLocalName();
591               if (n.equals(n2))
592            depth++;
593         } else if (et == END_ELEMENT) {
594            String n2 = r.getLocalName();
595            if (n.equals(n2))
596               depth--;
597            if (depth < 0)
598               return;
599         }
600      }
601   }
602
603   private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
604      HtmlTag tag = HtmlTag.forEvent(this, r);
605      if (tag.isOneOf(expected))
606         r.next();
607      else
608         throw new ParseException(this,
609            "Unexpected tag: ''{0}''.  Expected one of the following: {1}",
610            tag, expected);
611   }
612
613   private static int skipWs(XmlReader r)  throws XMLStreamException {
614      int event = r.getEventType();
615      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
616         event = r.next();
617      return event;
618   }
619
620   /**
621    * Parses CHARACTERS data.
622    *
623    * <p>
624    * Precondition:  Pointing to event immediately following opening tag.
625    * Postcondition:  Pointing to closing tag.
626    *
627    * @param r The stream being read from.
628    * @return The parsed string.
629    * @throws XMLStreamException Thrown by underlying XML stream.
630    */
631   @Override /* XmlParserSession */
632   protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException {
633
634      StringBuilder sb = getStringBuilder();
635
636      int et = r.getEventType();
637      if (et == END_ELEMENT)
638         return "";
639
640      int depth = 0;
641
642      String characters = null;
643
644      while (true) {
645         if (et == START_ELEMENT) {
646            if (characters != null) {
647               if (sb.length() == 0)
648                  characters = trimStart(characters);
649               sb.append(characters);
650               characters = null;
651            }
652            HtmlTag tag = HtmlTag.forEvent(this, r);
653            if (tag == BR) {
654               sb.append('\n');
655               r.nextTag();
656            } else if (tag == BS) {
657               sb.append('\b');
658               r.nextTag();
659            } else if (tag == SP) {
660               et = r.next();
661               if (et == CHARACTERS) {
662                  String s = r.getText();
663                  if (s.length() > 0) {
664                     char c = r.getText().charAt(0);
665                     if (c == '\u2003')
666                        c = '\t';
667                     sb.append(c);
668                  }
669                  r.nextTag();
670               }
671            } else if (tag == FF) {
672               sb.append('\f');
673               r.nextTag();
674            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
675               et = r.next();
676               if (et == CHARACTERS) {
677                  sb.append(r.getText());
678                  r.nextTag();
679               }
680            } else {
681               sb.append('<').append(r.getLocalName());
682               for (int i = 0; i < r.getAttributeCount(); i++)
683                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
684               sb.append('>');
685               depth++;
686            }
687         } else if (et == END_ELEMENT) {
688            if (characters != null) {
689               if (sb.length() == 0)
690                  characters = trimStart(characters);
691               if (depth == 0)
692                  characters = trimEnd(characters);
693               sb.append(characters);
694               characters = null;
695            }
696            if (depth == 0)
697               break;
698            sb.append('<').append(r.getLocalName()).append('>');
699            depth--;
700         } else if (et == CHARACTERS) {
701            characters = r.getText();
702         }
703         et = r.next();
704      }
705
706      String s = trim(sb.toString());
707      returnStringBuilder(sb);
708      return s;
709   }
710
711   /**
712    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
713    *
714    * <p>
715    * Precondition:  Pointing to opening tag.
716    * Postcondition:  Pointing to closing tag.
717    *
718    * @param r The stream being read from.
719    * @return The parsed string.
720    * @throws XMLStreamException Thrown by underlying XML stream.
721    * @throws ParseException Malformed input encountered.
722    */
723   @Override /* XmlParserSession */
724   protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException {
725      r.next();
726      return parseText(r);
727   }
728
729   @Override /* XmlParserSession */
730   protected final boolean isWhitespaceElement(XmlReader r) {
731      String s = r.getLocalName();
732      return whitespaceElements.contains(s);
733   }
734
735   @Override /* XmlParserSession */
736   protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException {
737
738      HtmlTag tag = HtmlTag.forEvent(this, r);
739      int et = r.next();
740      if (tag == BR) {
741         return "\n";
742      } else if (tag == BS) {
743         return "\b";
744      } else if (tag == FF) {
745         return "\f";
746      } else if (tag == SP) {
747         if (et == CHARACTERS) {
748            String s = r.getText();
749            if (s.charAt(0) == '\u2003')
750               s = "\t";
751            r.next();
752            return decodeString(s);
753         }
754         return "";
755      } else {
756         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
757      }
758   }
759
760   //-----------------------------------------------------------------------------------------------------------------
761   // Extended metadata
762   //-----------------------------------------------------------------------------------------------------------------
763
764   /**
765    * Returns the language-specific metadata on the specified class.
766    *
767    * @param cm The class to return the metadata on.
768    * @return The metadata.
769    */
770   protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) {
771      return ctx.getHtmlClassMeta(cm);
772   }
773
774   /**
775    * Returns the language-specific metadata on the specified bean property.
776    *
777    * @param bpm The bean property to return the metadata on.
778    * @return The metadata.
779    */
780   protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) {
781      return ctx.getHtmlBeanPropertyMeta(bpm);
782   }
783
784   //-----------------------------------------------------------------------------------------------------------------
785   // Other methods
786   //-----------------------------------------------------------------------------------------------------------------
787
788   @Override /* Session */
789   public OMap toMap() {
790      return super.toMap()
791         .a("HtmlParserSession", new DefaultFilteringOMap()
792         );
793   }
794}