001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.html;
018
019import static javax.xml.stream.XMLStreamConstants.*;
020import static org.apache.juneau.commons.utils.AssertionUtils.*;
021import static org.apache.juneau.commons.utils.CollectionUtils.*;
022import static org.apache.juneau.commons.utils.StringUtils.*;
023import static org.apache.juneau.commons.utils.Utils.*;
024import static org.apache.juneau.html.HtmlTag.*;
025
026import java.io.*;
027import java.lang.reflect.*;
028import java.nio.charset.*;
029import java.util.*;
030import java.util.function.*;
031
032import javax.xml.stream.*;
033
034import org.apache.juneau.*;
035import org.apache.juneau.collections.*;
036import org.apache.juneau.commons.lang.*;
037import org.apache.juneau.commons.reflect.*;
038import org.apache.juneau.html.annotation.*;
039import org.apache.juneau.httppart.*;
040import org.apache.juneau.parser.*;
041import org.apache.juneau.swap.*;
042import org.apache.juneau.xml.*;
043
044/**
045 * ContextSession object that lives for the duration of a single use of {@link HtmlParser}.
046 *
047 * <h5 class='section'>Notes:</h5><ul>
048 *    <li class='warn'>This class is not thread safe and is typically discarded after one use.
049 * </ul>
050 *
051 * <h5 class='section'>See Also:</h5><ul>
052 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/HtmlBasics">HTML Basics</a>
053
054 * </ul>
055 */
056@SuppressWarnings({ "unchecked", "rawtypes" })
057public class HtmlParserSession extends XmlParserSession {
058   /**
059    * Builder class.
060    */
061   public static class Builder extends XmlParserSession.Builder {
062
063      private HtmlParser ctx;
064
065      /**
066       * Constructor
067       *
068       * @param ctx The context creating this session.
069       *    <br>Cannot be <jk>null</jk>.
070       */
071      protected Builder(HtmlParser ctx) {
072         super(assertArgNotNull("ctx", ctx));
073         this.ctx = ctx;
074      }
075
076      @Override /* Overridden from Builder */
077      public <T> Builder apply(Class<T> type, Consumer<T> apply) {
078         super.apply(type, apply);
079         return this;
080      }
081
082      @Override
083      public HtmlParserSession build() {
084         return new HtmlParserSession(this);
085      }
086
087      @Override /* Overridden from Builder */
088      public Builder debug(Boolean value) {
089         super.debug(value);
090         return this;
091      }
092
093      @Override /* Overridden from Builder */
094      public Builder fileCharset(Charset value) {
095         super.fileCharset(value);
096         return this;
097      }
098
099      @Override /* Overridden from Builder */
100      public Builder javaMethod(Method value) {
101         super.javaMethod(value);
102         return this;
103      }
104
105      @Override /* Overridden from Builder */
106      public Builder locale(Locale value) {
107         super.locale(value);
108         return this;
109      }
110
111      @Override /* Overridden from Builder */
112      public Builder mediaType(MediaType value) {
113         super.mediaType(value);
114         return this;
115      }
116
117      @Override /* Overridden from Builder */
118      public Builder mediaTypeDefault(MediaType value) {
119         super.mediaTypeDefault(value);
120         return this;
121      }
122
123      @Override /* Overridden from Builder */
124      public Builder outer(Object value) {
125         super.outer(value);
126         return this;
127      }
128
129      @Override /* Overridden from Builder */
130      public Builder properties(Map<String,Object> value) {
131         super.properties(value);
132         return this;
133      }
134
135      @Override /* Overridden from Builder */
136      public Builder property(String key, Object value) {
137         super.property(key, value);
138         return this;
139      }
140
141      @Override /* Overridden from Builder */
142      public Builder schema(HttpPartSchema value) {
143         super.schema(value);
144         return this;
145      }
146
147      @Override /* Overridden from Builder */
148      public Builder schemaDefault(HttpPartSchema value) {
149         super.schemaDefault(value);
150         return this;
151      }
152
153      @Override /* Overridden from Builder */
154      public Builder streamCharset(Charset value) {
155         super.streamCharset(value);
156         return this;
157      }
158
159      @Override /* Overridden from Builder */
160      public Builder timeZone(TimeZone value) {
161         super.timeZone(value);
162         return this;
163      }
164
165      @Override /* Overridden from Builder */
166      public Builder timeZoneDefault(TimeZone value) {
167         super.timeZoneDefault(value);
168         return this;
169      }
170
171      @Override /* Overridden from Builder */
172      public Builder unmodifiable() {
173         super.unmodifiable();
174         return this;
175      }
176   }
177
178   private static final Set<String> whitespaceElements = set("br", "bs", "sp", "ff");
179
180   /**
181    * Creates a new builder for this object.
182    *
183    * @param ctx The context creating this session.
184    *    <br>Cannot be <jk>null</jk>.
185    * @return A new builder.
186    */
187   public static Builder create(HtmlParser ctx) {
188      return new Builder(assertArgNotNull("ctx", ctx));
189   }
190
191   private static String getAttribute(XmlReader r, String name, String def) {
192      for (var i = 0; i < r.getAttributeCount(); i++)
193         if (r.getAttributeLocalName(i).equals(name))
194            return r.getAttributeValue(i);
195      return def;
196   }
197
198   private static Map<String,String> getAttributes(XmlReader r) {
199      var m = new TreeMap<String,String>();
200      for (var i = 0; i < r.getAttributeCount(); i++)
201         m.put(r.getAttributeLocalName(i), r.getAttributeValue(i));
202      return m;
203   }
204
205   private static int skipWs(XmlReader r) throws XMLStreamException {
206      int event = r.getEventType();
207      while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace())
208         event = r.next();
209      return event;
210   }
211
212   private final HtmlParser ctx;
213
214   /**
215    * Constructor.
216    *
217    * @param builder The builder for this object.
218    */
219   protected HtmlParserSession(Builder builder) {
220      super(builder);
221      ctx = builder.ctx;
222   }
223
224   /*
225    * Reads the next tag.  Advances past anything that's not a start or end tag.  Throws an exception if
226    *    it's not one of the expected tags.
227    * Precondition:  Must be pointing before the event we want to parse.
228    * Postcondition:  Pointing at the tag just parsed.
229    */
230   private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
231      int et = r.next();
232
233      while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT)
234         et = r.next();
235
236      if (et == END_DOCUMENT)
237         throw new ParseException(this, "Unexpected end of document.");
238
239      var tag = HtmlTag.forEvent(this, r);
240      if (expected.length == 0)
241         return tag;
242      for (var t : expected)
243         if (t == tag)
244            return tag;
245
246      throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
247   }
248
249   /*
250    * Reads an anchor tag and converts it into a bean.
251    */
252   private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) throws IOException, ParseException, XMLStreamException {
253      String href = r.getAttributeValue(null, "href");
254      String name = getElementText(r);
255      if (nn(beanType) && getAnnotationProvider().has(HtmlLink.class, beanType)) {
256         var uriProperty = Value.<String>empty();
257         var nameProperty = Value.<String>empty();
258         beanType.forEachAnnotation(HtmlLink.class, x -> ne(x.uriProperty()), x -> uriProperty.set(x.uriProperty()));
259         beanType.forEachAnnotation(HtmlLink.class, x -> ne(x.nameProperty()), x -> nameProperty.set(x.nameProperty()));
260         BeanMap<T> m = newBeanMap(beanType.inner());
261         m.put(uriProperty.orElse(""), href);
262         m.put(nameProperty.orElse(""), name);
263         return m.getBean();
264      }
265      return convertToType(href, beanType);
266   }
267
268   /*
269    * Reads anything starting at the current event.
270    * <p>
271    * Precondition:  Must be pointing at outer START_ELEMENT.
272    * Postcondition:  Pointing at outer END_ELEMENT.
273    */
274   private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
275
276      if (eType == null)
277         eType = (ClassMeta<T>)object();
278      var swap = (ObjectSwap<T,Object>)eType.getSwap(this);
279      var builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this);
280      var sType = (ClassMeta<?>)null;
281      if (nn(builder))
282         sType = builder.getBuilderClassMeta(this);
283      else if (nn(swap))
284         sType = swap.getSwapClassMeta(this);
285      else
286         sType = eType;
287
288      if (sType.isOptional())
289         return (T)opt(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta));
290
291      setCurrentClass(sType);
292
293      int event = r.getEventType();
294      if (event != START_ELEMENT)
295         throw new ParseException(this, "parseAnything must be called on outer start element.");
296
297      if (! isRoot)
298         event = r.next();
299      var isEmpty = (event == END_ELEMENT);
300
301      // Skip until we find a start element, end document, or non-empty text.
302      if (! isEmpty)
303         event = skipWs(r);
304
305      if (event == END_DOCUMENT)
306         throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType);
307
308      // Handle @Html(asXml=true) beans.
309      var hcm = getHtmlClassMeta(sType);
310      if (hcm.getFormat() == HtmlFormat.XML)
311         return super.parseAnything(eType, null, r, outer, false, pMeta);
312
313      var o = (Object)null;
314
315      var isValid = true;
316      var tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
317
318      // If it's not a known tag, then parse it as XML.
319      // Allows us to parse stuff like "<div/>" into HTML5 beans.
320      if (tag == null && event != CHARACTERS)
321         return super.parseAnything(eType, null, r, outer, false, pMeta);
322
323      if (tag == HTML)
324         tag = skipToData(r);
325
326      if (isEmpty) {
327         o = "";
328      } else if (tag == null || tag.isOneOf(BR, BS, FF, SP)) {
329         String text = parseText(r);
330         if (sType.isObject() || sType.isCharSequence())
331            o = text;
332         else if (sType.isChar())
333            o = parseCharacter(text);
334         else if (sType.isBoolean())
335            o = Boolean.parseBoolean(text);
336         else if (sType.isNumber())
337            o = parseNumber(text, (Class<? extends Number>)eType.inner());
338         else if (sType.canCreateNewInstanceFromString(outer))
339            o = sType.newInstanceFromString(outer, text);
340         else
341            isValid = false;
342
343      } else if (tag == STRING || (tag == A && nn(pMeta) && nn(getHtmlBeanPropertyMeta(pMeta).getLink()))) {
344         String text = getElementText(r);
345         if (sType.isObject() || sType.isCharSequence())
346            o = text;
347         else if (sType.isChar())
348            o = parseCharacter(text);
349         else if (sType.canCreateNewInstanceFromString(outer))
350            o = sType.newInstanceFromString(outer, text);
351         else
352            isValid = false;
353         skipTag(r, tag == STRING ? xSTRING : xA);
354
355      } else if (tag == NUMBER) {
356         String text = getElementText(r);
357         if (sType.isObject())
358            o = parseNumber(text, Number.class);
359         else if (sType.isNumber())
360            o = parseNumber(text, (Class<? extends Number>)sType.inner());
361         else
362            isValid = false;
363         skipTag(r, xNUMBER);
364
365      } else if (tag == BOOLEAN) {
366         String text = getElementText(r);
367         if (sType.isObject() || sType.isBoolean())
368            o = Boolean.parseBoolean(text);
369         else
370            isValid = false;
371         skipTag(r, xBOOLEAN);
372
373      } else if (tag == P) {
374         String text = getElementText(r);
375         if (! "No Results".equals(text))
376            isValid = false;
377         skipTag(r, xP);
378
379      } else if (tag == NULL) {
380         skipTag(r, NULL);
381         skipTag(r, xNULL);
382
383      } else if (tag == A) {
384         o = parseAnchor(r, swap == null ? eType : null);
385         skipTag(r, xA);
386
387      } else if (tag == TABLE) {
388
389         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object");
390         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
391
392         if (nn(cm)) {
393            sType = eType = cm;
394            typeName = sType.isCollectionOrArray() ? "array" : "object";
395         } else if (! "array".equals(typeName)) {
396            // Type name could be a subtype name.
397            typeName = sType.isCollectionOrArray() ? "array" : "object";
398         }
399
400         if (typeName.equals("object")) {
401            if (sType.isObject()) {
402               o = parseIntoMap(r, newGenericMap(sType), sType.getKeyType(), sType.getValueType(), pMeta);
403            } else if (sType.isMap()) {
404               o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : newGenericMap(sType)), sType.getKeyType(), sType.getValueType(), pMeta);
405            } else if (nn(builder)) {
406               BeanMap m = toBeanMap(builder.create(this, eType));
407               o = builder.build(this, parseIntoBean(r, m).getBean(), eType);
408            } else if (sType.canCreateNewBean(outer)) {
409               BeanMap m = newBeanMap(outer, sType.inner());
410               o = parseIntoBean(r, m).getBean();
411            } else if (nn(sType.getProxyInvocationHandler())) {
412               BeanMap m = newBeanMap(outer, sType.inner());
413               o = parseIntoBean(r, m).getBean();
414            } else {
415               isValid = false;
416            }
417            skipTag(r, xTABLE);
418
419         } else if (typeName.equals("array")) {
420            if (sType.isObject())
421               o = parseTableIntoCollection(r, (Collection)new JsonList(this), sType, pMeta);
422            else if (sType.isCollection())
423               o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta);
424            else if (sType.isArray() || sType.isArgs()) {
425               var l = (ArrayList)parseTableIntoCollection(r, list(), sType, pMeta);
426               o = toArray(sType, l);
427            } else
428               isValid = false;
429            skipTag(r, xTABLE);
430
431         } else {
432            isValid = false;
433         }
434
435      } else if (tag == UL) {
436         String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array");
437         ClassMeta cm = getClassMeta(typeName, pMeta, eType);
438         if (nn(cm))
439            sType = eType = cm;
440
441         if (sType.isObject())
442            o = parseIntoCollection(r, new JsonList(this), sType, pMeta);
443         else if (sType.isCollection() || sType.isObject())
444            o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta);
445         else if (sType.isArray() || sType.isArgs())
446            o = toArray(sType, parseIntoCollection(r, list(), sType, pMeta));
447         else
448            isValid = false;
449         skipTag(r, xUL);
450
451      }
452
453      if (! isValid)
454         throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType);
455
456      if (nn(swap) && nn(o))
457         o = unswap(swap, o, eType);
458
459      if (nn(outer))
460         setParent(eType, o, outer);
461
462      skipWs(r);
463      return (T)o;
464   }
465
466   /*
467    * Reads contents of <table> element.
468    * Precondition:  Must be pointing at event following <table> event.
469    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
470    */
471   private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException {
472      while (true) {
473         HtmlTag tag = nextTag(r, TR, xTABLE);
474         if (tag == xTABLE)
475            break;
476         tag = nextTag(r, TD, TH);
477         // Skip over the column headers.
478         if (tag == TH) {
479            skipTag(r);
480            r.nextTag();
481            skipTag(r);
482         } else {
483            String key = getElementText(r);
484            nextTag(r, TD);
485            var pMeta = m.getPropertyMeta(key);
486            if (pMeta == null) {
487               onUnknownProperty(key, m, parseAnything(object(), r, null, false, null));
488            } else {
489               var cm = pMeta.getClassMeta();
490               Object value = parseAnything(cm, r, m.getBean(false), false, pMeta);
491               setName(cm, value, key);
492               try {
493                  pMeta.set(m, key, value);
494               } catch (BeanRuntimeException e) {
495                  onBeanSetterException(pMeta, e);
496                  throw e;
497               }
498            }
499         }
500         HtmlTag t = nextTag(r, xTD, xTR);
501         if (t == xTD)
502            nextTag(r, xTR);
503      }
504      return m;
505   }
506
507   /*
508    * Reads contents of <ul> element.
509    * Precondition:  Must be pointing at event following <ul> event.
510    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
511    */
512   private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException {
513      int argIndex = 0;
514      while (true) {
515         HtmlTag tag = nextTag(r, LI, xUL, xLI);
516         if (tag == xLI)
517            tag = nextTag(r, LI, xUL, xLI);
518         if (tag == xUL)
519            break;
520         var elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
521         l.add((E)parseAnything(elementType, r, l, false, pMeta));
522      }
523      return l;
524   }
525
526   /*
527    * Reads contents of <table> element.
528    * Precondition:  Must be pointing at <table> event.
529    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
530    */
531   private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, ClassMeta<V> valueType, BeanPropertyMeta pMeta)
532      throws IOException, ParseException, ExecutableException, XMLStreamException {
533      while (true) {
534         HtmlTag tag = nextTag(r, TR, xTABLE);
535         if (tag == xTABLE)
536            break;
537         tag = nextTag(r, TD, TH);
538         // Skip over the column headers.
539         if (tag == TH) {
540            skipTag(r);
541            r.nextTag();
542            skipTag(r);
543         } else {
544            K key = parseAnything(keyType, r, m, false, pMeta);
545            nextTag(r, TD);
546            V value = parseAnything(valueType, r, m, false, pMeta);
547            setName(valueType, value, key);
548            m.put(key, value);
549         }
550         tag = nextTag(r, xTD, xTR);
551         if (tag == xTD)
552            nextTag(r, xTR);
553      }
554
555      return m;
556   }
557
558   /*
559    * Reads contents of <ul> element.
560    * Precondition:  Must be pointing at event following <ul> event.
561    * Postcondition:  Pointing at next START_ELEMENT or END_DOCUMENT event.
562    */
563   private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, ClassMeta<E> type, BeanPropertyMeta pMeta)
564      throws IOException, ParseException, ExecutableException, XMLStreamException {
565
566      HtmlTag tag = nextTag(r, TR);
567      List<String> keys = list();
568      while (true) {
569         tag = nextTag(r, TH, xTR);
570         if (tag == xTR)
571            break;
572         keys.add(getElementText(r));
573      }
574
575      int argIndex = 0;
576
577      while (true) {
578         r.nextTag();
579         tag = HtmlTag.forEvent(this, r);
580         if (tag == xTABLE)
581            break;
582
583         var elementType = (ClassMeta)null;
584         String beanType = getAttribute(r, getBeanTypePropertyName(type), null);
585         if (nn(beanType))
586            elementType = getClassMeta(beanType, pMeta, null);
587         if (elementType == null)
588            elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType();
589         if (elementType == null)
590            elementType = object();
591
592         BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this);
593
594         if (nn(builder) || elementType.canCreateNewBean(l)) {
595            // @formatter:off
596            BeanMap m =
597               nn(builder)
598               ? toBeanMap(builder.create(this, elementType))
599               : newBeanMap(l, elementType.inner())
600            ;
601            // @formatter:on
602            for (var key : keys) {
603               tag = nextTag(r, xTD, TD, NULL);
604               if (tag == xTD)
605                  tag = nextTag(r, TD, NULL);
606               if (tag == NULL) {
607                  m = null;
608                  nextTag(r, xNULL);
609                  break;
610               }
611               BeanMapEntry e = m.getProperty(key);
612               if (e == null) {
613                  parseAnything(object(), r, l, false, null);
614               } else {
615                  BeanPropertyMeta bpm = e.getMeta();
616                  var cm = bpm.getClassMeta();
617                  Object value = parseAnything(cm, r, m.getBean(false), false, bpm);
618                  setName(cm, value, key);
619                  bpm.set(m, key, value);
620               }
621            }
622            // @formatter:off
623            l.add(
624               m == null
625               ? null
626               : nn(builder)
627                  ? builder.build(this, m.getBean(), elementType)
628                  : (E)m.getBean()
629            );
630            // @formatter:on
631         } else {
632            String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType()));
633            var m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) : newGenericMap(elementType));
634            for (var key : keys) {
635               tag = nextTag(r, TD, NULL);
636               if (tag == NULL) {
637                  m = null;
638                  nextTag(r, xNULL);
639                  break;
640               }
641               if (nn(m)) {
642                  var kt = elementType.getKeyType();
643                  var vt = elementType.getValueType();
644                  Object value = parseAnything(vt, r, l, false, pMeta);
645                  setName(vt, value, key);
646                  m.put(convertToType(key, kt), value);
647               }
648            }
649            if (nn(m) && nn(c)) {
650               var m2 = (m instanceof JsonMap ? (JsonMap)m : new JsonMap(m).session(this));
651               m2.put(getBeanTypePropertyName(type.getElementType()), c);
652               l.add((E)cast(m2, pMeta, elementType));
653            } else {
654               if (m instanceof JsonMap m2)
655                  l.add((E)convertToType(m2, elementType));
656               else
657                  l.add((E)m);
658            }
659         }
660         nextTag(r, xTR);
661      }
662      return l;
663   }
664
665   /*
666    * Skips over the current element and advances to the next element.
667    * <p>
668    * Precondition:  Pointing to opening tag.
669    * Postcondition:  Pointing to next opening tag.
670    *
671    * @param r The stream being read from.
672    * @throws XMLStreamException
673    */
674   private void skipTag(XmlReader r) throws ParseException, XMLStreamException {
675      int et = r.getEventType();
676
677      if (et != START_ELEMENT)
678         throw new ParseException(this, "skipToNextTag() call on invalid event ''{0}''.  Must only be called on START_ELEMENT events.", XmlUtils.toReadableEvent(r));
679
680      String n = r.getLocalName();
681
682      int depth = 0;
683      while (true) {
684         et = r.next();
685         if (et == START_ELEMENT) {
686            String n2 = r.getLocalName();
687            if (n.equals(n2))
688               depth++;
689         } else if (et == END_ELEMENT) {
690            String n2 = r.getLocalName();
691            if (n.equals(n2))
692               depth--;
693            if (depth < 0)
694               return;
695         }
696      }
697   }
698
699   private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException {
700      var tag = HtmlTag.forEvent(this, r);
701      if (tag.isOneOf(expected))
702         r.next();
703      else
704         throw new ParseException(this, "Unexpected tag: ''{0}''.  Expected one of the following: {1}", tag, expected);
705   }
706
707   /*
708    * For parsing output from HtmlDocSerializer, this skips over the head, title, and links.
709    */
710   private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException {
711      while (true) {
712         var event = r.next();
713         if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) {
714            r.nextTag();
715            event = r.getEventType();
716            var isEmpty = (event == END_ELEMENT);
717            // Skip until we find a start element, end document, or non-empty text.
718            if (! isEmpty)
719               event = skipWs(r);
720            if (event == END_DOCUMENT)
721               throw new ParseException(this, "Unexpected end of stream looking for data.");
722            return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false));
723         }
724      }
725   }
726
727   @Override /* Overridden from ParserSession */
728   protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException {
729      try {
730         return parseAnything(type, getXmlReader(pipe), getOuter(), true, null);
731      } catch (XMLStreamException e) {
732         throw new ParseException(e);
733      }
734   }
735
736   @Override /* Overridden from ReaderParserSession */
737   protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) throws Exception {
738      return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null);
739   }
740
741   @Override /* Overridden from ReaderParserSession */
742   protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) throws Exception {
743      return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), (ClassMeta<V>)getClassMeta(valueType), null);
744   }
745
746   /**
747    * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag.
748    *
749    * <p>
750    * Precondition:  Pointing to opening tag.
751    * Postcondition:  Pointing to closing tag.
752    *
753    * @param r The stream being read from.
754    * @return The parsed string.
755    * @throws XMLStreamException Thrown by underlying XML stream.
756    * @throws ParseException Malformed input encountered.
757    */
758   @Override /* Overridden from XmlParserSession */
759   protected String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException {
760      r.next();
761      return parseText(r);
762   }
763
764   /**
765    * Returns the language-specific metadata on the specified bean property.
766    *
767    * @param bpm The bean property to return the metadata on.
768    * @return The metadata.
769    */
770   protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) {
771      return ctx.getHtmlBeanPropertyMeta(bpm);
772   }
773
774   /**
775    * Returns the language-specific metadata on the specified class.
776    *
777    * @param cm The class to return the metadata on.
778    * @return The metadata.
779    */
780   protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) {
781      return ctx.getHtmlClassMeta(cm);
782   }
783
784   @Override /* Overridden from XmlParserSession */
785   protected boolean isWhitespaceElement(XmlReader r) {
786      String s = r.getLocalName();
787      return whitespaceElements.contains(s);
788   }
789
790   /**
791    * Parses CHARACTERS data.
792    *
793    * <p>
794    * Precondition:  Pointing to event immediately following opening tag.
795    * Postcondition:  Pointing to closing tag.
796    *
797    * @param r The stream being read from.
798    * @return The parsed string.
799    * @throws XMLStreamException Thrown by underlying XML stream.
800    */
801   @Override /* Overridden from XmlParserSession */
802   protected String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException {
803
804      StringBuilder sb = getStringBuilder();
805
806      int et = r.getEventType();
807      if (et == END_ELEMENT)
808         return "";
809
810      int depth = 0;
811
812      var characters = (String)null;
813
814      while (true) {
815         if (et == START_ELEMENT) {
816            if (nn(characters)) {
817               if (sb.length() == 0)
818                  characters = trimStart(characters);
819               sb.append(characters);
820               characters = null;
821            }
822            var tag = HtmlTag.forEvent(this, r);
823            if (tag == BR) {
824               sb.append('\n');
825               r.nextTag();
826            } else if (tag == BS) {
827               sb.append('\b');
828               r.nextTag();
829            } else if (tag == SP) {
830               et = r.next();
831               if (et == CHARACTERS) {
832                  String s = r.getText();
833                  if (ne(s)) {
834                     var c = r.getText().charAt(0);
835                     if (c == '\u2003')
836                        c = '\t';
837                     sb.append(c);
838                  }
839                  r.nextTag();
840               }
841            } else if (tag == FF) {
842               sb.append('\f');
843               r.nextTag();
844            } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) {
845               et = r.next();
846               if (et == CHARACTERS) {
847                  sb.append(r.getText());
848                  r.nextTag();
849               }
850            } else {
851               sb.append('<').append(r.getLocalName());
852               for (var i = 0; i < r.getAttributeCount(); i++)
853                  sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\'');
854               sb.append('>');
855               depth++;
856            }
857         } else if (et == END_ELEMENT) {
858            if (nn(characters)) {
859               if (sb.length() == 0)
860                  characters = trimStart(characters);
861               if (depth == 0)
862                  characters = trimEnd(characters);
863               sb.append(characters);
864               characters = null;
865            }
866            if (depth == 0)
867               break;
868            sb.append('<').append(r.getLocalName()).append('>');
869            depth--;
870         } else if (et == CHARACTERS) {
871            characters = r.getText();
872         }
873         et = r.next();
874      }
875
876      String s = trim(sb.toString());
877      returnStringBuilder(sb);
878      return s;
879   }
880
881   @Override /* Overridden from XmlParserSession */
882   protected String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException {
883
884      var tag = HtmlTag.forEvent(this, r);
885      int et = r.next();
886      if (tag == BR) {
887         return "\n";
888      } else if (tag == BS) {
889         return "\b";
890      } else if (tag == FF) {
891         return "\f";
892      } else if (tag == SP) {
893         if (et == CHARACTERS) {
894            String s = r.getText();
895            if (s.charAt(0) == '\u2003')
896               s = "\t";
897            r.next();
898            return decodeString(s);
899         }
900         return "";
901      } else {
902         throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag);
903      }
904   }
905}