001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.uon;
014
015import java.io.*;
016
017import org.apache.juneau.parser.*;
018
019/**
020 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences.
021 *
022 * <p>
023 * Escape sequences are assumed to be encoded UTF-8.  Extended Unicode (&gt;\u10000) is supported.
024 *
025 * <p>
026 * If decoding is enabled, the following character replacements occur so that boundaries are not lost:
027 * <ul>
028 *    <li><js>'&amp;'</js> -&gt; <js>'\u0001'</js>
029 *    <li><js>'='</js> -&gt; <js>'\u0002'</js>
030 * </ul>
031 *
032 * <h5 class='section'>See Also:</h5><ul>
033 *    <li class='link'><a class="doclink" href="../../../../index.html#jm.UonDetails">UON Details</a>
034
035 * </ul>
036 */
037public final class UonReader extends ParserReader {
038
039   private final boolean decodeChars;
040   private final char[] buff;
041
042   // Writable properties.
043   private int iCurrent, iEnd;
044
045
046   /**
047    * Constructor.
048    *
049    * @param pipe The parser input.
050    * @param decodeChars Whether the input is URL-encoded.
051    * @throws IOException Thrown by underlying stream.
052    */
053   public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException {
054      super(pipe);
055      this.decodeChars = decodeChars;
056      if (pipe.isString()) {
057         String in = pipe.getInputAsString();
058         this.buff = new char[in.length() < 1024 ? in.length() : 1024];
059      } else {
060         this.buff = new char[1024];
061      }
062   }
063
064   @Override /* Reader */
065   public final int read(char[] cbuf, int off, int len) throws IOException {
066
067      if (! decodeChars)
068         return super.read(cbuf, off, len);
069
070      // Copy any remainder to the beginning of the buffer.
071      int remainder = iEnd - iCurrent;
072      if (remainder > 0)
073         System.arraycopy(buff, iCurrent, buff, 0, remainder);
074      iCurrent = 0;
075
076      int expected = buff.length - remainder;
077
078      int x = super.read(buff, remainder, expected);
079      if (x == -1 && remainder == 0)
080         return -1;
081
082      iEnd = remainder + (x == -1 ? 0 : x);
083
084      int i = 0;
085      while (i < len) {
086         if (iCurrent >= iEnd)
087            return i;
088         char c = buff[iCurrent++];
089         if (c == '+') {
090            cbuf[off + i++] = ' ';
091         } else if (c == '&') {
092            cbuf[off + i++] = '\u0001';
093         } else if (c == '=') {
094            cbuf[off + i++] = '\u0002';
095         } else if (c != '%') {
096            cbuf[off + i++] = c;
097         } else {
098            int iMark = iCurrent-1;  // Keep track of current position.
099
100            // Stop if there aren't at least two more characters following '%' in the buffer,
101            // or there aren't at least two more positions open in cbuf to handle double-char chars.
102            if (iMark+2 >= iEnd || i+2 > len) {
103               iCurrent--;
104               return i;
105            }
106
107            int b0 = readEncodedByte();
108            int cx;
109
110            // 0xxxxxxx
111            if (b0 < 128) {
112               cx = b0;
113
114            // 10xxxxxx
115            } else if (b0 < 192) {
116               throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence:  "+b0);
117
118            // 110xxxxx 10xxxxxx
119            // 11000000(192) - 11011111(223)
120            } else if (b0 < 224) {
121               cx = readUTF8(b0-192, 1);
122               if (cx == -1) {
123                  iCurrent = iMark;
124                  return i;
125               }
126
127            // 1110xxxx 10xxxxxx 10xxxxxx
128            // 11100000(224) - 11101111(239)
129            } else if (b0 < 240) {
130               cx = readUTF8(b0-224, 2);
131               if (cx == -1) {
132                  iCurrent = iMark;
133                  return i;
134               }
135
136            // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
137            // 11110000(240) - 11110111(247)
138            } else if (b0 < 248) {
139               cx = readUTF8(b0-240, 3);
140               if (cx == -1) {
141                  iCurrent = iMark;
142                  return i;
143               }
144
145            } else
146               throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence:  "+b0);
147
148            if (cx < 0x10000)
149               cbuf[off + i++] = (char)cx;
150            else {
151               cx -= 0x10000;
152               cbuf[off + i++] = (char)(0xd800 + (cx >> 10));
153               cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff));
154            }
155         }
156      }
157      return i;
158   }
159
160   private int readUTF8(int n, final int numBytes) throws IOException {
161      if (iCurrent + numBytes*3 > iEnd)
162         return -1;
163      for (int i = 0; i < numBytes; i++) {
164         n <<= 6;
165         n += readHex()-128;
166      }
167      return n;
168   }
169
170   private int readHex() throws IOException {
171      int c = buff[iCurrent++];
172      if (c != '%')
173         throw new IOException("Did not find expected '%' character in UTF-8 sequence.");
174      return readEncodedByte();
175   }
176
177   private int readEncodedByte() throws IOException {
178      if (iEnd <= iCurrent + 1)
179         throw new IOException("Incomplete trailing escape pattern");
180      int h = buff[iCurrent++];
181      int l = buff[iCurrent++];
182      h = fromHexChar(h);
183      l = fromHexChar(l);
184      return (h << 4) + l;
185   }
186
187   private static int fromHexChar(int c) throws IOException {
188      if (c >= '0' && c <= '9')
189         return c - '0';
190      if (c >= 'a' && c <= 'f')
191         return 10 + c - 'a';
192      if (c >= 'A' && c <= 'F')
193         return 10 + c - 'A';
194      throw new IOException("Invalid hex character '"+c+"' found in escape pattern.");
195   }
196
197   @Override /* ParserReader */
198   public final UonReader unread() throws IOException {
199      super.unread();
200      return this;
201   }
202}