001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.uon;
018
019import java.io.*;
020
021import org.apache.juneau.parser.*;
022
023/**
024 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences.
025 *
026 * <p>
027 * Escape sequences are assumed to be encoded UTF-8.  Extended Unicode (&gt;\u10000) is supported.
028 *
029 * <p>
030 * If decoding is enabled, the following character replacements occur so that boundaries are not lost:
031 * <ul>
032 *    <li><js>'&amp;'</js> -&gt; <js>'\u0001'</js>
033 *    <li><js>'='</js> -&gt; <js>'\u0002'</js>
034 * </ul>
035 *
036 * <h5 class='section'>See Also:</h5><ul>
037 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/UonBasics">UON Basics</a>
038
039 * </ul>
040 */
041public class UonReader extends ParserReader {
042
043   private final boolean decodeChars;
044   private final char[] buff;
045
046   // Writable properties.
047   private int iCurrent, iEnd;
048
049
050   /**
051    * Constructor.
052    *
053    * @param pipe The parser input.
054    * @param decodeChars Whether the input is URL-encoded.
055    * @throws IOException Thrown by underlying stream.
056    */
057   public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException {
058      super(pipe);
059      this.decodeChars = decodeChars;
060      if (pipe.isString()) {
061         String in = pipe.getInputAsString();
062         this.buff = new char[in.length() < 1024 ? in.length() : 1024];
063      } else {
064         this.buff = new char[1024];
065      }
066   }
067
068   @Override /* Reader */
069   public int read(char[] cbuf, int off, int len) throws IOException {
070
071      if (! decodeChars)
072         return super.read(cbuf, off, len);
073
074      // Copy any remainder to the beginning of the buffer.
075      int remainder = iEnd - iCurrent;
076      if (remainder > 0)
077         System.arraycopy(buff, iCurrent, buff, 0, remainder);
078      iCurrent = 0;
079
080      int expected = buff.length - remainder;
081
082      int x = super.read(buff, remainder, expected);
083      if (x == -1 && remainder == 0)
084         return -1;
085
086      iEnd = remainder + (x == -1 ? 0 : x);
087
088      int i = 0;
089      while (i < len) {
090         if (iCurrent >= iEnd)
091            return i;
092         char c = buff[iCurrent++];
093         if (c == '+') {
094            cbuf[off + i++] = ' ';
095         } else if (c == '&') {
096            cbuf[off + i++] = '\u0001';
097         } else if (c == '=') {
098            cbuf[off + i++] = '\u0002';
099         } else if (c != '%') {
100            cbuf[off + i++] = c;
101         } else {
102            int iMark = iCurrent-1;  // Keep track of current position.
103
104            // Stop if there aren't at least two more characters following '%' in the buffer,
105            // or there aren't at least two more positions open in cbuf to handle double-char chars.
106            if (iMark+2 >= iEnd || i+2 > len) {
107               iCurrent--;
108               return i;
109            }
110
111            int b0 = readEncodedByte();
112            int cx;
113
114            // 0xxxxxxx
115            if (b0 < 128) {
116               cx = b0;
117
118            // 10xxxxxx
119            } else if (b0 < 192) {
120               throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence:  "+b0);
121
122            // 110xxxxx 10xxxxxx
123            // 11000000(192) - 11011111(223)
124            } else if (b0 < 224) {
125               cx = readUTF8(b0-192, 1);
126               if (cx == -1) {
127                  iCurrent = iMark;
128                  return i;
129               }
130
131            // 1110xxxx 10xxxxxx 10xxxxxx
132            // 11100000(224) - 11101111(239)
133            } else if (b0 < 240) {
134               cx = readUTF8(b0-224, 2);
135               if (cx == -1) {
136                  iCurrent = iMark;
137                  return i;
138               }
139
140            // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
141            // 11110000(240) - 11110111(247)
142            } else if (b0 < 248) {
143               cx = readUTF8(b0-240, 3);
144               if (cx == -1) {
145                  iCurrent = iMark;
146                  return i;
147               }
148
149            } else
150               throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence:  "+b0);
151
152            if (cx < 0x10000)
153               cbuf[off + i++] = (char)cx;
154            else {
155               cx -= 0x10000;
156               cbuf[off + i++] = (char)(0xd800 + (cx >> 10));
157               cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff));
158            }
159         }
160      }
161      return i;
162   }
163
164   private int readUTF8(int n, final int numBytes) throws IOException {
165      if (iCurrent + numBytes*3 > iEnd)
166         return -1;
167      for (int i = 0; i < numBytes; i++) {
168         n <<= 6;
169         n += readHex()-128;
170      }
171      return n;
172   }
173
174   private int readHex() throws IOException {
175      int c = buff[iCurrent++];
176      if (c != '%')
177         throw new IOException("Did not find expected '%' character in UTF-8 sequence.");
178      return readEncodedByte();
179   }
180
181   private int readEncodedByte() throws IOException {
182      if (iEnd <= iCurrent + 1)
183         throw new IOException("Incomplete trailing escape pattern");
184      int h = buff[iCurrent++];
185      int l = buff[iCurrent++];
186      h = fromHexChar(h);
187      l = fromHexChar(l);
188      return (h << 4) + l;
189   }
190
191   private static int fromHexChar(int c) throws IOException {
192      if (c >= '0' && c <= '9')
193         return c - '0';
194      if (c >= 'a' && c <= 'f')
195         return 10 + c - 'a';
196      if (c >= 'A' && c <= 'F')
197         return 10 + c - 'A';
198      throw new IOException("Invalid hex character '"+c+"' found in escape pattern.");
199   }
200
201   @Override /* ParserReader */
202   public UonReader unread() throws IOException {
203      super.unread();
204      return this;
205   }
206}