001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.html;
018
019import static javax.xml.stream.XMLStreamConstants.*;
020import static org.apache.juneau.common.utils.StringUtils.*;
021import static org.apache.juneau.common.utils.Utils.*;
022import static org.apache.juneau.html.HtmlTag.*;
023
024import java.io.*;
025import java.lang.reflect.*;
026import java.nio.charset.*;
027import java.util.*;
028import java.util.function.*;
029
030import javax.xml.stream.*;
031
032import org.apache.juneau.*;
033import org.apache.juneau.collections.*;
034import org.apache.juneau.common.utils.*;
035import org.apache.juneau.html.annotation.*;
036import org.apache.juneau.httppart.*;
037import org.apache.juneau.internal.*;
038import org.apache.juneau.parser.*;
039import org.apache.juneau.swap.*;
040import org.apache.juneau.xml.*;
041
042/**
043 * ContextSession object that lives for the duration of a single use of {@link HtmlParser}.
044 *
045 * <h5 class='section'>Notes:</h5><ul>
046 *    <li class='warn'>This class is not thread safe and is typically discarded after one use.
047 * </ul>
048 *
049 * <h5 class='section'>See Also:</h5><ul>
050 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/HtmlBasics">HTML Basics</a>
051
052 * </ul>
053 */
054@SuppressWarnings({ "unchecked", "rawtypes" })
055public class HtmlParserSession extends XmlParserSession {
056
057   //-------------------------------------------------------------------------------------------------------------------
058   // Static
059   //-------------------------------------------------------------------------------------------------------------------
060
061   private static final Set<String> whitespaceElements = set("br","bs","sp","ff");
062
063   /**
064    * Creates a new builder for this object.
065    *
066    * @param ctx The context creating this session.
067    * @return A new builder.
068    */
069   public static Builder create(HtmlParser ctx) {
070      return new Builder(ctx);
071   }
072
073   //-------------------------------------------------------------------------------------------------------------------
074   // Builder
075   //-------------------------------------------------------------------------------------------------------------------
076
077   /**
078    * Builder class.
079    */
080   public static class Builder extends XmlParserSession.Builder {
081
082      HtmlParser ctx;
083
084      /**
085       * Constructor
086       *
087       * @param ctx The context creating this session.
088       */
089      protected Builder(HtmlParser ctx) {
090         super(ctx);
091         this.ctx = ctx;
092      }
093
094      @Override
095      public HtmlParserSession build() {
096         return new HtmlParserSession(this);
097      }
098      @Override /* Overridden from Builder */
099      public <T> Builder apply(Class<T> type, Consumer<T> apply) {
100         super.apply(type, apply);
101         return this;
102      }
103
104      @Override /* Overridden from Builder */
105      public Builder debug(Boolean value) {
106         super.debug(value);
107         return this;
108      }
109
110      @Override /* Overridden from Builder */
111      public Builder properties(Map<String,Object> value) {
112         super.properties(value);
113         return this;
114      }
115
116      @Override /* Overridden from Builder */
117      public Builder property(String key, Object value) {
118         super.property(key, value);
119         return this;
120      }
121
122      @Override /* Overridden from Builder */
123      public Builder unmodifiable() {
124         super.unmodifiable();
125         return this;
126      }
127
128      @Override /* Overridden from Builder */
129      public Builder locale(Locale value) {
130         super.locale(value);
131         return this;
132      }
133
134      @Override /* Overridden from Builder */
135      public Builder localeDefault(Locale value) {
136         super.localeDefault(value);
137         return this;
138      }
139
140      @Override /* Overridden from Builder */
141      public Builder mediaType(MediaType value) {
142         super.mediaType(value);
143         return this;
144      }
145
146      @Override /* Overridden from Builder */
147      public Builder mediaTypeDefault(MediaType value) {
148         super.mediaTypeDefault(value);
149         return this;
150      }
151
152      @Override /* Overridden from Builder */
153      public Builder timeZone(TimeZone value) {
154         super.timeZone(value);
155         return this;
156      }
157
158      @Override /* Overridden from Builder */
159      public Builder timeZoneDefault(TimeZone value) {
160         super.timeZoneDefault(value);
161         return this;
162      }
163
164      @Override /* Overridden from Builder */
165      public Builder javaMethod(Method value) {
166         super.javaMethod(value);
167         return this;
168      }
169
170      @Override /* Overridden from Builder */
171      public Builder outer(Object value) {
172         super.outer(value);
173         return this;
174      }
175
176      @Override /* Overridden from Builder */
177      public Builder schema(HttpPartSchema value) {
178         super.schema(value);
179         return this;
180      }
181
182      @Override /* Overridden from Builder */
183      public Builder schemaDefault(HttpPartSchema value) {
184         super.schemaDefault(value);
185         return this;
186      }
187
188      @Override /* Overridden from Builder */
189      public Builder fileCharset(Charset value) {
190         super.fileCharset(value);
191         return this;
192      }
193
194      @Override /* Overridden from Builder */
195      public Builder streamCharset(Charset value) {
196         super.streamCharset(value);
197         return this;
198      }
199   }
200
201   //-------------------------------------------------------------------------------------------------------------------
202   // Instance
203   //-------------------------------------------------------------------------------------------------------------------
204
205   private final HtmlParser ctx;
206
207   /**
208    * Constructor.
209    *
210    * @param builder The builder for this object.
211    */
212   protected HtmlParserSession(Builder builder) {
213      super(builder);
214      ctx = builder.ctx;
215   }
216
217   @Override /* ParserSession */
218   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException {
219      try {
220         return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
221      } catch (XMLStreamException e) {
222         throw new ParseException(e);
223      }
224   }
225
226   @Override /* ReaderParserSession */
227   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType)
228         throws Exception {
229      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType),
230         (ClassMeta<V>)getClassMeta(valueType), null);
231   }
232
233   @Override /* ReaderParserSession */
234   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType)
235         throws Exception {
236      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
237   }
238
239   /*
240    * Reads anything starting at the current event.
241    * <p>
242    * Precondition:  Must be pointing at outer START_ELEMENT.
243    * Postcondition:  Pointing at outer END_ELEMENT.
244    */
245   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
246
247      if (eType == null)
248         eType = (ClassMeta<T>)object();
249      ObjectSwap<T,Object> swap = (ObjectSwap<T,Object>)eType.getSwap(this);
250      BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
251      ClassMeta<?> sType = null;
252      if (builder != null)
253         sType = builder.getBuilderClassMeta(this);
254      else if (swap != null)
255         sType = swap.getSwapClassMeta(this);
256      else
257         sType = eType;
258
259      if (sType.isOptional())
260         return (T)Utils.opt(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta));
261
262      setCurrentClass(sType);
263
264      int event = r.getEventType();
265      if (event != START_ELEMENT)
266         throw new ParseException(this, "parseAnything must be called on outer start element.");
267
268      if (! isRoot)
269         event = r.next();
270      boolean isEmpty = (event == END_ELEMENT);
271
272      // Skip until we find a start element, end document, or non-empty text.
273      if (! isEmpty)
274         event = skipWs(r);
275
276      if (event == END_DOCUMENT)
277         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
278
279      // Handle @Html(asXml=true) beans.
280      HtmlClassMeta hcm = getHtmlClassMeta(sType);
281      if (hcm.getFormat() == HtmlFormat.XML)
282         return super.parseAnything(eType, null, r, outer, false, pMeta);
283
284      Object o = null;
285
286      boolean isValid = true;
287      HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
288
289      // If it's not a known tag, then parse it as XML.
290      // Allows us to parse stuff like "<div/>" into HTML5 beans.
291      if (tag == null && event != CHARACTERS)
292         return super.parseAnything(eType, null, r, outer, false, pMeta);
293
294      if (tag == HTML)
295         tag = skipToData(r);
296
297      if (isEmpty) {
298         o = "";
299      } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) {
300         String text = parseText(r);
301         if (sType.isObject() || sType.isCharSequence())
302            o = text;
303         else if (sType.isChar())
304            o = parseCharacter(text);
305         else if (sType.isBoolean())
306            o = Boolean.parseBoolean(text);
307         else if (sType.isNumber())
308            o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass());
309         else if (sType.canCreateNewInstanceFromString(outer))
310            o = sType.newInstanceFromString(outer, text);
311         else
312            isValid = false;
313
314      } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) {
315         String text = getElementText(r);
316         if (sType.isObject() || sType.isCharSequence())
317            o = text;
318         else if (sType.isChar())
319            o = parseCharacter(text);
320         else if (sType.canCreateNewInstanceFromString(outer))
321            o = sType.newInstanceFromString(outer, text);
322         else
323            isValid = false;
324         skipTag(r, tag == STRING ? xSTRING : xA);
325
326      } else if (tag == NUMBER) {
327         String text = getElementText(r);
328         if (sType.isObject())
329            o = parseNumber(text, Number.class);
330         else if (sType.isNumber())
331            o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass());
332         else
333            isValid = false;
334         skipTag(r, xNUMBER);
335
336      } else if (tag == BOOLEAN) {
337         String text = getElementText(r);
338         if (sType.isObject() || sType.isBoolean())
339            o = Boolean.parseBoolean(text);
340         else
341            isValid = false;
342         skipTag(r, xBOOLEAN);
343
344      } else if (tag == P) {
345         String text = getElementText(r);
346         if (! "No Results".equals(text))
347            isValid = false;
348         skipTag(r, xP);
349
350      } else if (tag == NULL) {
351         skipTag(r, NULL);
352         skipTag(r, xNULL);
353
354      } else if (tag == A) {
355         o = parseAnchor(r, swap == null ? eType : null);
356         skipTag(r, xA);
357
358      } else if (tag == TABLE) {
359
360         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
361         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
362
363         if (cm != null) {
364            sType = eType = cm;
365            typeName = sType.isCollectionOrArray() ? "array" : "object";
366         } else if (! "array".equals(typeName)) {
367            // Type name could be a subtype name.
368            typeName = sType.isCollectionOrArray() ? "array" : "object";
369         }
370
371         if (typeName.equals("object")) {
372            if (sType.isObject()) {
373               o = parseIntoMap(r, newGenericMap(sType), sType.getKeyType(), sType.getValueType(),
374                  pMeta);
375            } else if (sType.isMap()) {
376               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer)
377                  : newGenericMap(sType)), sType.getKeyType(), sType.getValueType(), pMeta);
378            } else if (builder != null) {
379               BeanMap m = toBeanMap(builder.create(this, eType));
380               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
381            } else if (sType.canCreateNewBean(outer)) {
382               BeanMap m = newBeanMap(outer, sType.getInnerClass());
383               o = parseIntoBean(r, m).getBean();
384            } else if (sType.getProxyInvocationHandler() != null) {
385               BeanMap m = newBeanMap(outer, sType.getInnerClass());
386               o = parseIntoBean(r, m).getBean();
387            } else {
388               isValid = false;
389            }
390            skipTag(r, xTABLE);
391
392         } else if (typeName.equals("array")) {
393            if (sType.isObject())
394               o = parseTableIntoCollection(r, (Collection)new JsonList(this), sType, pMeta);
395            else if (sType.isCollection())
396               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
397                  ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta);
398            else if (sType.isArray() || sType.isArgs()) {
399               ArrayList l = (ArrayList)parseTableIntoCollection(r, list(), sType, pMeta);
400               o = toArray(sType, l);
401            }
402            else
403               isValid = false;
404            skipTag(r, xTABLE);
405
406         } else {
407            isValid = false;
408         }
409
410      } else if (tag == UL) {
411         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
412         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
413         if (cm != null)
414            sType = eType = cm;
415
416         if (sType.isObject())
417            o = parseIntoCollection(r, new JsonList(this), sType, pMeta);
418         else if (sType.isCollection() || sType.isObject())
419            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer)
420               ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta);
421         else if (sType.isArray() || sType.isArgs())
422            o = toArray(sType, parseIntoCollection(r, list(), sType, pMeta));
423         else
424            isValid = false;
425         skipTag(r, xUL);
426
427      }
428
429      if (! isValid)
430         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
431
432      if (swap != null && o != null)
433         o = unswap(swap, o, eType);
434
435      if (outer != null)
436         setParent(eType, o, outer);
437
438      skipWs(r);
439      return (T)o;
440   }
441
442   /*
443    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
444    */
445   private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException {
446      while (true) {
447         int event = r.next();
448         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
449            r.nextTag();
450            event = r.getEventType();
451            boolean isEmpty = (event == END_ELEMENT);
452            // Skip until we find a start element, end document, or non-empty text.
453            if (! isEmpty)
454               event = skipWs(r);
455            if (event == END_DOCUMENT)
456               throw new ParseException(this, "Unexpected end of stream looking for data.");
457            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
458         }
459      }
460   }
461
462   private static String getAttribute(XmlReader r, String name, String def) {
463      for (int i = 0; i < r.getAttributeCount(); i++)
464         if (r.getAttributeLocalName(i).equals(name))
465            return r.getAttributeValue(i);
466      return def;
467   }
468
469   /*
470    * Reads an anchor tag and converts it into a bean.
471    */
472   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType)
473         throws IOException, ParseException, XMLStreamException {
474      String href = r.getAttributeValue(null, "href");
475      String name = getElementText(r);
476      if (beanType != null && beanType.hasAnnotation(HtmlLink.class)) {
477         Value<String> uriProperty = Value.empty(), nameProperty = Value.empty();
478         beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.uriProperty()), x -> uriProperty.set(x.uriProperty()));
479         beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.nameProperty()), x -> nameProperty.set(x.nameProperty()));
480         BeanMap<T> m = newBeanMap(beanType.getInnerClass());
481         m.put(uriProperty.orElse(""), href);
482         m.put(nameProperty.orElse(""), name);
483         return m.getBean();
484      }
485      return convertToType(href, beanType);
486   }
487
488   private static Map<String,String> getAttributes(XmlReader r) {
489      Map<String,String> m = new TreeMap<>() ;
490      for (int i = 0; i < r.getAttributeCount(); i++)
491         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
492      return m;
493   }
494
495   /*
496    * Reads contents of <table> element.
497    * Precondition:  Must be pointing at <table> event.
498    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
499    */
500   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType,
501         ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
502      while (true) {
503         HtmlTag tag = nextTag(r, TR, xTABLE);
504         if (tag == xTABLE)
505            break;
506         tag = nextTag(r, TD, TH);
507         // Skip over the column headers.
508         if (tag == TH) {
509            skipTag(r);
510            r.nextTag();
511            skipTag(r);
512         } else {
513            K key = parseAnything(keyType, r, m, false, pMeta);
514            nextTag(r, TD);
515            V value = parseAnything(valueType, r, m, false, pMeta);
516            setName(valueType, value, key);
517            m.put(key, value);
518         }
519         tag = nextTag(r, xTD, xTR);
520         if (tag == xTD)
521            nextTag(r, xTR);
522      }
523
524      return m;
525   }
526
527   /*
528    * Reads contents of <ul> element.
529    * Precondition:  Must be pointing at event following <ul> event.
530    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
531    */
532   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l,
533         ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
534      int argIndex = 0;
535      while (true) {
536         HtmlTag tag = nextTag(r, LI, xUL, xLI);
537         if (tag == xLI)
538            tag = nextTag(r, LI, xUL, xLI);
539         if (tag == xUL)
540            break;
541         ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
542         l.add((E)parseAnything(elementType, r, l, false, pMeta));
543      }
544      return l;
545   }
546
547   /*
548    * Reads contents of <ul> element.
549    * Precondition:  Must be pointing at event following <ul> event.
550    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
551    */
552   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l,
553         ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
554
555      HtmlTag tag = nextTag(r, TR);
556      List<String> keys = list();
557      while (true) {
558         tag = nextTag(r, TH, xTR);
559         if (tag == xTR)
560            break;
561         keys.add(getElementText(r));
562      }
563
564      int argIndex = 0;
565
566      while (true) {
567         r.nextTag();
568         tag = HtmlTag.forEvent(this, r);
569         if (tag == xTABLE)
570            break;
571
572         ClassMeta elementType = null;
573         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
574         if (beanType != null)
575            elementType = getClassMeta(beanType, pMeta, null);
576         if (elementType == null)
577            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
578         if (elementType == null)
579            elementType = object();
580
581         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
582
583         if (builder != null || elementType.canCreateNewBean(l)) {
584            BeanMap m =
585               builder != null
586               ? toBeanMap(builder.create(this, elementType))
587               : newBeanMap(l, elementType.getInnerClass())
588            ;
589            for (String key : keys) {
590               tag = nextTag(r, xTD, TD, NULL);
591               if (tag == xTD)
592                  tag = nextTag(r, TD, NULL);
593               if (tag == NULL) {
594                  m = null;
595                  nextTag(r, xNULL);
596                  break;
597               }
598               BeanMapEntry e = m.getProperty(key);
599               if (e == null) {
600                  //onUnknownProperty(key, m, -1, -1);
601                  parseAnything(object(), r, l, false, null);
602               } else {
603                  BeanPropertyMeta bpm = e.getMeta();
604                  ClassMeta<?> cm = bpm.getClassMeta();
605                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
606                  setName(cm, value, key);
607                  bpm.set(m, key, value);
608               }
609            }
610            l.add(
611               m == null
612               ? null
613               : builder != null
614                  ? builder.build(this, m.getBean(), elementType)
615                  : (E)m.getBean()
616            );
617         } else {
618            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
619            Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l)
620               : newGenericMap(elementType));
621            for (String key : keys) {
622               tag = nextTag(r, TD, NULL);
623               if (tag == NULL) {
624                  m = null;
625                  nextTag(r, xNULL);
626                  break;
627               }
628               if (m != null) {
629                  ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType();
630                  Object value = parseAnything(vt, r, l, false, pMeta);
631                  setName(vt, value, key);
632                  m.put(convertToType(key, kt), value);
633               }
634            }
635            if (m != null && c != null) {
636               JsonMap m2 = (m instanceof JsonMap ? (JsonMap)m : new JsonMap(m).session(this));
637               m2.put(getBeanTypePropertyName(type.getElementType()), c);
638               l.add((E)cast(m2, pMeta, elementType));
639            } else {
640               if (m instanceof JsonMap)
641                  l.add((E)convertToType(m, elementType));
642               else
643                  l.add((E)m);
644            }
645         }
646         nextTag(r, xTR);
647      }
648      return l;
649   }
650
651   /*
652    * Reads contents of <table> element.
653    * Precondition:  Must be pointing at event following <table> event.
654    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
655    */
656   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException {
657      while (true) {
658         HtmlTag tag = nextTag(r, TR, xTABLE);
659         if (tag == xTABLE)
660            break;
661         tag = nextTag(r, TD, TH);
662         // Skip over the column headers.
663         if (tag == TH) {
664            skipTag(r);
665            r.nextTag();
666            skipTag(r);
667         } else {
668            String key = getElementText(r);
669            nextTag(r, TD);
670            BeanPropertyMeta pMeta = m.getPropertyMeta(key);
671            if (pMeta == null) {
672               onUnknownProperty(key, m, parseAnything(object(), r, null, false, null));
673            } else {
674               ClassMeta<?> cm = pMeta.getClassMeta();
675               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
676               setName(cm, value, key);
677               try {
678                  pMeta.set(m, key, value);
679               } catch (BeanRuntimeException e) {
680                  onBeanSetterException(pMeta, e);
681                  throw e;
682               }
683            }
684         }
685         HtmlTag t = nextTag(r, xTD, xTR);
686         if (t == xTD)
687            nextTag(r, xTR);
688      }
689      return m;
690   }
691
692   /*
693    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
694    *    it's not one of the expected tags.
695    * Precondition:  Must be pointing before the event we want to parse.
696    * Postcondition:  Pointing at the tag just parsed.
697    */
698   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
699      int et = r.next();
700
701      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
702         et = r.next();
703
704      if (et == END_DOCUMENT)
705         throw new ParseException(this, "Unexpected end of document.");
706
707      HtmlTag tag = HtmlTag.forEvent(this, r);
708      if (expected.length == 0)
709         return tag;
710      for (HtmlTag t : expected)
711         if (t == tag)
712            return tag;
713
714      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
715   }
716
717   /*
718    * Skips over the current element and advances to the next element.
719    * <p>
720    * Precondition:  Pointing to opening tag.
721    * Postcondition:  Pointing to next opening tag.
722    *
723    * @param r The stream being read from.
724    * @throws XMLStreamException
725    */
726   private void skipTag(XmlReader r) throws ParseException, XMLStreamException {
727      int et = r.getEventType();
728
729      if (et != START_ELEMENT)
730         throw new ParseException(this,
731            "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.",
732            XmlUtils.toReadableEvent(r)
733         );
734
735      String n = r.getLocalName();
736
737      int depth = 0;
738      while (true) {
739         et = r.next();
740         if (et == START_ELEMENT) {
741            String n2 = r.getLocalName();
742               if (n.equals(n2))
743                  depth++;
744         } else if (et == END_ELEMENT) {
745            String n2 = r.getLocalName();
746            if (n.equals(n2))
747               depth--;
748            if (depth < 0)
749               return;
750         }
751      }
752   }
753
754   private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
755      HtmlTag tag = HtmlTag.forEvent(this, r);
756      if (tag.isOneOf(expected))
757         r.next();
758      else
759         throw new ParseException(this,
760            "Unexpected tag: ''{0}''.  Expected one of the following: {1}",
761            tag, expected);
762   }
763
764   private static int skipWs(XmlReader r)  throws XMLStreamException {
765      int event = r.getEventType();
766      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
767         event = r.next();
768      return event;
769   }
770
771   /**
772    * Parses CHARACTERS data.
773    *
774    * <p>
775    * Precondition:  Pointing to event immediately following opening tag.
776    * Postcondition:  Pointing to closing tag.
777    *
778    * @param r The stream being read from.
779    * @return The parsed string.
780    * @throws XMLStreamException Thrown by underlying XML stream.
781    */
782   @Override /* XmlParserSession */
783   protected String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException {
784
785      StringBuilder sb = getStringBuilder();
786
787      int et = r.getEventType();
788      if (et == END_ELEMENT)
789         return "";
790
791      int depth = 0;
792
793      String characters = null;
794
795      while (true) {
796         if (et == START_ELEMENT) {
797            if (characters != null) {
798               if (sb.length() == 0)
799                  characters = trimStart(characters);
800               sb.append(characters);
801               characters = null;
802            }
803            HtmlTag tag = HtmlTag.forEvent(this, r);
804            if (tag == BR) {
805               sb.append('\n');
806               r.nextTag();
807            } else if (tag == BS) {
808               sb.append('\b');
809               r.nextTag();
810            } else if (tag == SP) {
811               et = r.next();
812               if (et == CHARACTERS) {
813                  String s = r.getText();
814                  if (isNotEmpty(s)) {
815                     char c = r.getText().charAt(0);
816                     if (c == '\u2003')
817                        c = '\t';
818                     sb.append(c);
819                  }
820                  r.nextTag();
821               }
822            } else if (tag == FF) {
823               sb.append('\f');
824               r.nextTag();
825            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
826               et = r.next();
827               if (et == CHARACTERS) {
828                  sb.append(r.getText());
829                  r.nextTag();
830               }
831            } else {
832               sb.append('<').append(r.getLocalName());
833               for (int i = 0; i < r.getAttributeCount(); i++)
834                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
835               sb.append('>');
836               depth++;
837            }
838         } else if (et == END_ELEMENT) {
839            if (characters != null) {
840               if (sb.length() == 0)
841                  characters = trimStart(characters);
842               if (depth == 0)
843                  characters = trimEnd(characters);
844               sb.append(characters);
845               characters = null;
846            }
847            if (depth == 0)
848               break;
849            sb.append('<').append(r.getLocalName()).append('>');
850            depth--;
851         } else if (et == CHARACTERS) {
852            characters = r.getText();
853         }
854         et = r.next();
855      }
856
857      String s = trim(sb.toString());
858      returnStringBuilder(sb);
859      return s;
860   }
861
862   /**
863    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
864    *
865    * <p>
866    * Precondition:  Pointing to opening tag.
867    * Postcondition:  Pointing to closing tag.
868    *
869    * @param r The stream being read from.
870    * @return The parsed string.
871    * @throws XMLStreamException Thrown by underlying XML stream.
872    * @throws ParseException Malformed input encountered.
873    */
874   @Override /* XmlParserSession */
875   protected String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException {
876      r.next();
877      return parseText(r);
878   }
879
880   @Override /* XmlParserSession */
881   protected boolean isWhitespaceElement(XmlReader r) {
882      String s = r.getLocalName();
883      return whitespaceElements.contains(s);
884   }
885
886   @Override /* XmlParserSession */
887   protected String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException {
888
889      HtmlTag tag = HtmlTag.forEvent(this, r);
890      int et = r.next();
891      if (tag == BR) {
892         return "\n";
893      } else if (tag == BS) {
894         return "\b";
895      } else if (tag == FF) {
896         return "\f";
897      } else if (tag == SP) {
898         if (et == CHARACTERS) {
899            String s = r.getText();
900            if (s.charAt(0) == '\u2003')
901               s = "\t";
902            r.next();
903            return decodeString(s);
904         }
905         return "";
906      } else {
907         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
908      }
909   }
910
911   //-----------------------------------------------------------------------------------------------------------------
912   // Extended metadata
913   //-----------------------------------------------------------------------------------------------------------------
914
915   /**
916    * Returns the language-specific metadata on the specified class.
917    *
918    * @param cm The class to return the metadata on.
919    * @return The metadata.
920    */
921   protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) {
922      return ctx.getHtmlClassMeta(cm);
923   }
924
925   /**
926    * Returns the language-specific metadata on the specified bean property.
927    *
928    * @param bpm The bean property to return the metadata on.
929    * @return The metadata.
930    */
931   protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) {
932      return ctx.getHtmlBeanPropertyMeta(bpm);
933   }
934}