001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.uon; 018 019import java.io.*; 020 021import org.apache.juneau.parser.*; 022 023/** 024 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences. 025 * 026 * <p> 027 * Escape sequences are assumed to be encoded UTF-8. Extended Unicode (>\u10000) is supported. 028 * 029 * <p> 030 * If decoding is enabled, the following character replacements occur so that boundaries are not lost: 031 * <ul> 032 * <li><js>'&'</js> -> <js>'\u0001'</js> 033 * <li><js>'='</js> -> <js>'\u0002'</js> 034 * </ul> 035 * 036 * <h5 class='section'>See Also:</h5><ul> 037 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/UonBasics">UON Basics</a> 038 039 * </ul> 040 */ 041public class UonReader extends ParserReader { 042 043 private final boolean decodeChars; 044 private final char[] buff; 045 046 // Writable properties. 047 private int iCurrent, iEnd; 048 049 050 /** 051 * Constructor. 052 * 053 * @param pipe The parser input. 054 * @param decodeChars Whether the input is URL-encoded. 055 * @throws IOException Thrown by underlying stream. 056 */ 057 public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException { 058 super(pipe); 059 this.decodeChars = decodeChars; 060 if (pipe.isString()) { 061 String in = pipe.getInputAsString(); 062 this.buff = new char[in.length() < 1024 ? in.length() : 1024]; 063 } else { 064 this.buff = new char[1024]; 065 } 066 } 067 068 @Override /* Reader */ 069 public int read(char[] cbuf, int off, int len) throws IOException { 070 071 if (! decodeChars) 072 return super.read(cbuf, off, len); 073 074 // Copy any remainder to the beginning of the buffer. 075 int remainder = iEnd - iCurrent; 076 if (remainder > 0) 077 System.arraycopy(buff, iCurrent, buff, 0, remainder); 078 iCurrent = 0; 079 080 int expected = buff.length - remainder; 081 082 int x = super.read(buff, remainder, expected); 083 if (x == -1 && remainder == 0) 084 return -1; 085 086 iEnd = remainder + (x == -1 ? 0 : x); 087 088 int i = 0; 089 while (i < len) { 090 if (iCurrent >= iEnd) 091 return i; 092 char c = buff[iCurrent++]; 093 if (c == '+') { 094 cbuf[off + i++] = ' '; 095 } else if (c == '&') { 096 cbuf[off + i++] = '\u0001'; 097 } else if (c == '=') { 098 cbuf[off + i++] = '\u0002'; 099 } else if (c != '%') { 100 cbuf[off + i++] = c; 101 } else { 102 int iMark = iCurrent-1; // Keep track of current position. 103 104 // Stop if there aren't at least two more characters following '%' in the buffer, 105 // or there aren't at least two more positions open in cbuf to handle double-char chars. 106 if (iMark+2 >= iEnd || i+2 > len) { 107 iCurrent--; 108 return i; 109 } 110 111 int b0 = readEncodedByte(); 112 int cx; 113 114 // 0xxxxxxx 115 if (b0 < 128) { 116 cx = b0; 117 118 // 10xxxxxx 119 } else if (b0 < 192) { 120 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: "+b0); 121 122 // 110xxxxx 10xxxxxx 123 // 11000000(192) - 11011111(223) 124 } else if (b0 < 224) { 125 cx = readUTF8(b0-192, 1); 126 if (cx == -1) { 127 iCurrent = iMark; 128 return i; 129 } 130 131 // 1110xxxx 10xxxxxx 10xxxxxx 132 // 11100000(224) - 11101111(239) 133 } else if (b0 < 240) { 134 cx = readUTF8(b0-224, 2); 135 if (cx == -1) { 136 iCurrent = iMark; 137 return i; 138 } 139 140 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 141 // 11110000(240) - 11110111(247) 142 } else if (b0 < 248) { 143 cx = readUTF8(b0-240, 3); 144 if (cx == -1) { 145 iCurrent = iMark; 146 return i; 147 } 148 149 } else 150 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: "+b0); 151 152 if (cx < 0x10000) 153 cbuf[off + i++] = (char)cx; 154 else { 155 cx -= 0x10000; 156 cbuf[off + i++] = (char)(0xd800 + (cx >> 10)); 157 cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff)); 158 } 159 } 160 } 161 return i; 162 } 163 164 private int readUTF8(int n, final int numBytes) throws IOException { 165 if (iCurrent + numBytes*3 > iEnd) 166 return -1; 167 for (int i = 0; i < numBytes; i++) { 168 n <<= 6; 169 n += readHex()-128; 170 } 171 return n; 172 } 173 174 private int readHex() throws IOException { 175 int c = buff[iCurrent++]; 176 if (c != '%') 177 throw new IOException("Did not find expected '%' character in UTF-8 sequence."); 178 return readEncodedByte(); 179 } 180 181 private int readEncodedByte() throws IOException { 182 if (iEnd <= iCurrent + 1) 183 throw new IOException("Incomplete trailing escape pattern"); 184 int h = buff[iCurrent++]; 185 int l = buff[iCurrent++]; 186 h = fromHexChar(h); 187 l = fromHexChar(l); 188 return (h << 4) + l; 189 } 190 191 private static int fromHexChar(int c) throws IOException { 192 if (c >= '0' && c <= '9') 193 return c - '0'; 194 if (c >= 'a' && c <= 'f') 195 return 10 + c - 'a'; 196 if (c >= 'A' && c <= 'F') 197 return 10 + c - 'A'; 198 throw new IOException("Invalid hex character '"+c+"' found in escape pattern."); 199 } 200 201 @Override /* ParserReader */ 202 public UonReader unread() throws IOException { 203 super.unread(); 204 return this; 205 } 206}