001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.uon; 014 015import java.io.*; 016 017import org.apache.juneau.parser.*; 018 019/** 020 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences. 021 * 022 * <p> 023 * Escape sequences are assumed to be encoded UTF-8. Extended Unicode (>\u10000) is supported. 024 * 025 * <p> 026 * If decoding is enabled, the following character replacements occur so that boundaries are not lost: 027 * <ul> 028 * <li><js>'&'</js> -> <js>'\u0001'</js> 029 * <li><js>'='</js> -> <js>'\u0002'</js> 030 * </ul> 031 */ 032public final class UonReader extends ParserReader { 033 034 private final boolean decodeChars; 035 private final char[] buff; 036 037 // Writable properties. 038 private int iCurrent, iEnd; 039 040 041 /** 042 * Constructor. 043 * 044 * @param pipe The parser input. 045 * @param decodeChars Whether the input is URL-encoded. 046 * @throws IOException Thrown by underlying stream. 047 */ 048 public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException { 049 super(pipe); 050 this.decodeChars = decodeChars; 051 if (pipe.isString()) { 052 String in = pipe.getInputAsString(); 053 this.buff = new char[in.length() < 1024 ? in.length() : 1024]; 054 } else { 055 this.buff = new char[1024]; 056 } 057 } 058 059 @Override /* Reader */ 060 public final int read(char[] cbuf, int off, int len) throws IOException { 061 062 if (! decodeChars) 063 return super.read(cbuf, off, len); 064 065 // Copy any remainder to the beginning of the buffer. 066 int remainder = iEnd - iCurrent; 067 if (remainder > 0) 068 System.arraycopy(buff, iCurrent, buff, 0, remainder); 069 iCurrent = 0; 070 071 int expected = buff.length - remainder; 072 073 int x = super.read(buff, remainder, expected); 074 if (x == -1 && remainder == 0) 075 return -1; 076 077 iEnd = remainder + (x == -1 ? 0 : x); 078 079 int i = 0; 080 while (i < len) { 081 if (iCurrent >= iEnd) 082 return i; 083 char c = buff[iCurrent++]; 084 if (c == '+') { 085 cbuf[off + i++] = ' '; 086 } else if (c == '&') { 087 cbuf[off + i++] = '\u0001'; 088 } else if (c == '=') { 089 cbuf[off + i++] = '\u0002'; 090 } else if (c != '%') { 091 cbuf[off + i++] = c; 092 } else { 093 int iMark = iCurrent-1; // Keep track of current position. 094 095 // Stop if there aren't at least two more characters following '%' in the buffer, 096 // or there aren't at least two more positions open in cbuf to handle double-char chars. 097 if (iMark+2 >= iEnd || i+2 > len) { 098 iCurrent--; 099 return i; 100 } 101 102 int b0 = readEncodedByte(); 103 int cx; 104 105 // 0xxxxxxx 106 if (b0 < 128) { 107 cx = b0; 108 109 // 10xxxxxx 110 } else if (b0 < 192) { 111 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: " + b0); 112 113 // 110xxxxx 10xxxxxx 114 // 11000000(192) - 11011111(223) 115 } else if (b0 < 224) { 116 cx = readUTF8(b0-192, 1); 117 if (cx == -1) { 118 iCurrent = iMark; 119 return i; 120 } 121 122 // 1110xxxx 10xxxxxx 10xxxxxx 123 // 11100000(224) - 11101111(239) 124 } else if (b0 < 240) { 125 cx = readUTF8(b0-224, 2); 126 if (cx == -1) { 127 iCurrent = iMark; 128 return i; 129 } 130 131 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 132 // 11110000(240) - 11110111(247) 133 } else if (b0 < 248) { 134 cx = readUTF8(b0-240, 3); 135 if (cx == -1) { 136 iCurrent = iMark; 137 return i; 138 } 139 140 } else 141 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: " + b0); 142 143 if (cx < 0x10000) 144 cbuf[off + i++] = (char)cx; 145 else { 146 cx -= 0x10000; 147 cbuf[off + i++] = (char)(0xd800 + (cx >> 10)); 148 cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff)); 149 } 150 } 151 } 152 return i; 153 } 154 155 private int readUTF8(int n, final int numBytes) throws IOException { 156 if (iCurrent + numBytes*3 > iEnd) 157 return -1; 158 for (int i = 0; i < numBytes; i++) { 159 n <<= 6; 160 n += readHex()-128; 161 } 162 return n; 163 } 164 165 private int readHex() throws IOException { 166 int c = buff[iCurrent++]; 167 if (c != '%') 168 throw new IOException("Did not find expected '%' character in UTF-8 sequence."); 169 return readEncodedByte(); 170 } 171 172 private int readEncodedByte() throws IOException { 173 if (iEnd <= iCurrent + 1) 174 throw new IOException("Incomplete trailing escape pattern"); 175 int h = buff[iCurrent++]; 176 int l = buff[iCurrent++]; 177 h = fromHexChar(h); 178 l = fromHexChar(l); 179 return (h << 4) + l; 180 } 181 182 private static int fromHexChar(int c) throws IOException { 183 if (c >= '0' && c <= '9') 184 return c - '0'; 185 if (c >= 'a' && c <= 'f') 186 return 10 + c - 'a'; 187 if (c >= 'A' && c <= 'F') 188 return 10 + c - 'A'; 189 throw new IOException("Invalid hex character '"+c+"' found in escape pattern."); 190 } 191 192 @Override /* ParserReader */ 193 public final UonReader unread() throws IOException { 194 super.unread(); 195 return this; 196 } 197}