001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.msgpack; 018 019import static org.apache.juneau.commons.utils.IoUtils.*; 020import static org.apache.juneau.commons.utils.ThrowableUtils.*; 021import static org.apache.juneau.msgpack.DataType.*; 022 023import java.io.*; 024 025import org.apache.juneau.parser.*; 026 027/** 028 * Specialized input stream for parsing MessagePack streams. 029 * 030 * <h5 class='section'>Notes:</h5><ul> 031 * <li class='note'> 032 * This class is not intended for external use. 033 * </ul> 034 * 035 * <h5 class='section'>See Also:</h5><ul> 036 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/MessagePackBasics">MessagePack Basics</a> 037 038 * </ul> 039 */ 040public class MsgPackInputStream extends ParserInputStream { 041 042 // Data type quick-lookup table. 043 // @formatter:off 044 private static final DataType[] TYPES = { 045 /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 046 /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 047 /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 048 /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 049 /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 050 /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 051 /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 052 /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 053 /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP, 054 /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY, 055 /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING, 056 /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING, 057 /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG, 058 /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP, 059 /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, 060 /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT 061 }; 062 // @formatter:on 063 private DataType currentDataType; 064 private long length; 065 private int lastByte; 066 private int extType; 067 068 int pos = 0; 069 070 /** 071 * Constructor. 072 * 073 * @param pipe The parser input. 074 * @throws IOException Thrown by underlying stream. 075 */ 076 protected MsgPackInputStream(ParserPipe pipe) throws IOException { 077 super(pipe); 078 } 079 080 /** 081 * Read one byte from the stream. 082 */ 083 private int readUInt1() throws IOException { 084 return read(); 085 } 086 087 /** 088 * Read two bytes from the stream. 089 */ 090 private int readUInt2() throws IOException { 091 return (read() << 8) | read(); 092 } 093 094 /** 095 * Read four bytes from the stream. 096 */ 097 private long readUInt4() throws IOException { 098 long l = read(); 099 l <<= 8; 100 l |= read(); 101 l <<= 8; 102 l |= read(); 103 l <<= 8; 104 l |= read(); 105 return l; 106 } 107 108 /** 109 * Return the extended-format type. 110 * Currently not used. 111 */ 112 int getExtType() { return extType; } 113 114 /** 115 * Read a binary field from the stream. 116 */ 117 byte[] readBinary() throws IOException { 118 var b = new byte[(int)length]; 119 read(b); 120 return b; 121 } 122 123 /** 124 * Read a boolean from the stream. 125 */ 126 boolean readBoolean() { 127 return lastByte == TRUE; 128 } 129 130 /** 131 * Reads the data type flag from the stream. 132 * 133 * <p> 134 * This is the byte that indicates what kind of data follows. 135 */ 136 DataType readDataType() throws IOException { 137 int i = read(); 138 if (i == -1) 139 throw ioex("Unexpected end of file found at position {0}", pos); 140 currentDataType = TYPES[i]; 141 switch (currentDataType) { 142 case NULL: 143 case FLOAT: { 144 length = 4; 145 break; 146 } 147 case DOUBLE: { 148 length = 8; 149 break; 150 } 151 case BOOLEAN: { 152 lastByte = i; 153 break; 154 } 155 case INT: { 156 // positive fixnum stores 7-bit positive integer 157 // +--------+ 158 // |0XXXXXXX| 159 // +--------+ 160 // 161 // negative fixnum stores 5-bit negative integer 162 // +--------+ 163 // |111YYYYY| 164 // +--------+ 165 // 166 // * 0XXXXXXX is 8-bit unsigned integer 167 // * 111YYYYY is 8-bit signed integer 168 // 169 // uint 8 stores a 8-bit unsigned integer 170 // +--------+--------+ 171 // | 0xcc |ZZZZZZZZ| 172 // +--------+--------+ 173 // 174 // uint 16 stores a 16-bit big-endian unsigned integer 175 // +--------+--------+--------+ 176 // | 0xcd |ZZZZZZZZ|ZZZZZZZZ| 177 // +--------+--------+--------+ 178 // 179 // uint 32 stores a 32-bit big-endian unsigned integer 180 // +--------+--------+--------+--------+--------+ 181 // | 0xce |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| 182 // +--------+--------+--------+--------+--------+ 183 // 184 // uint 64 stores a 64-bit big-endian unsigned integer 185 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+ 186 // | 0xcf |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| 187 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+ 188 // 189 // int 8 stores a 8-bit signed integer 190 // +--------+--------+ 191 // | 0xd0 |ZZZZZZZZ| 192 // +--------+--------+ 193 // 194 // int 16 stores a 16-bit big-endian signed integer 195 // +--------+--------+--------+ 196 // | 0xd1 |ZZZZZZZZ|ZZZZZZZZ| 197 // +--------+--------+--------+ 198 // 199 // int 32 stores a 32-bit big-endian signed integer 200 // +--------+--------+--------+--------+--------+ 201 // | 0xd2 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| 202 // +--------+--------+--------+--------+--------+ 203 // 204 // int 64 stores a 64-bit big-endian signed integer 205 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+ 206 // | 0xd3 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| 207 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+ 208 lastByte = i; 209 if (i <= POSFIXINT_U) 210 length = 0; 211 else if (i >= NEGFIXINT_L) 212 length = -1; 213 else if (i == INT8 || i == UINT8) 214 length = 1; 215 else if (i == INT16 || i == UINT16) 216 length = 2; 217 else if (i == INT32) 218 length = 4; 219 else 220 length = 0; 221 break; 222 } 223 case LONG: { 224 if (i == UINT32) 225 length = 4; 226 else if (i == INT64 || i == UINT64) 227 length = 8; 228 else 229 length = 0; 230 break; 231 } 232 case STRING: { 233 // fixstr stores a byte array whose length is up to 31 bytes: 234 // +--------+========+ 235 // |101XXXXX| data | 236 // +--------+========+ 237 // 238 // str 8 stores a byte array whose length is up to (2^8)-1 bytes: 239 // +--------+--------+========+ 240 // | 0xd9 |YYYYYYYY| data | 241 // +--------+--------+========+ 242 // 243 // str 16 stores a byte array whose length is up to (2^16)-1 bytes: 244 // +--------+--------+--------+========+ 245 // | 0xda |ZZZZZZZZ|ZZZZZZZZ| data | 246 // +--------+--------+--------+========+ 247 // 248 // str 32 stores a byte array whose length is up to (2^32)-1 bytes: 249 // +--------+--------+--------+--------+--------+========+ 250 // | 0xdb |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA| data | 251 // +--------+--------+--------+--------+--------+========+ 252 // 253 // where 254 // * XXXXX is a 5-bit unsigned integer which represents N 255 // * YYYYYYYY is a 8-bit unsigned integer which represents N 256 // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N 257 // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N 258 // * N is the length of data 259 if (i <= FIXSTR_U) 260 length = i & 0x1F; 261 else if (i == STR8) 262 length = readUInt1(); 263 else if (i == STR16) 264 length = readUInt2(); 265 else 266 length = readUInt4(); 267 break; 268 } 269 case ARRAY: { 270 // fixarray stores an array whose length is up to 15 elements: 271 // +--------+~~~~~~~~~~~~~~~~~+ 272 // |1001XXXX| N objects | 273 // +--------+~~~~~~~~~~~~~~~~~+ 274 // 275 // array 16 stores an array whose length is up to (2^16)-1 elements: 276 // +--------+--------+--------+~~~~~~~~~~~~~~~~~+ 277 // | 0xdc |YYYYYYYY|YYYYYYYY| N objects | 278 // +--------+--------+--------+~~~~~~~~~~~~~~~~~+ 279 // 280 // array 32 stores an array whose length is up to (2^32)-1 elements: 281 // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ 282 // | 0xdd |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| N objects | 283 // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ 284 // 285 // where 286 // * XXXX is a 4-bit unsigned integer which represents N 287 // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N 288 // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N 289 // N is the size of a array 290 if (i <= FIXARRAY_U) 291 length = i & 0x0F; 292 else if (i == ARRAY16) 293 length = readUInt2(); 294 else 295 length = readUInt4(); 296 break; 297 } 298 case BIN: { 299 // bin 8 stores a byte array whose length is up to (2^8)-1 bytes: 300 // +--------+--------+========+ 301 // | 0xc4 |XXXXXXXX| data | 302 // +--------+--------+========+ 303 // 304 // bin 16 stores a byte array whose length is up to (2^16)-1 bytes: 305 // +--------+--------+--------+========+ 306 // | 0xc5 |YYYYYYYY|YYYYYYYY| data | 307 // +--------+--------+--------+========+ 308 // 309 // bin 32 stores a byte array whose length is up to (2^32)-1 bytes: 310 // +--------+--------+--------+--------+--------+========+ 311 // | 0xc6 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| data | 312 // +--------+--------+--------+--------+--------+========+ 313 // 314 // where 315 // * XXXXXXXX is a 8-bit unsigned integer which represents N 316 // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N 317 // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N 318 // * N is the length of data 319 if (i == BIN8) 320 length = readUInt1(); 321 else if (i == BIN16) 322 length = readUInt2(); 323 else 324 length = readUInt4(); 325 break; 326 } 327 case EXT: { 328 // fixext 1 stores an integer and a byte array whose length is 1 byte 329 // +--------+--------+--------+ 330 // | 0xd4 | type | data | 331 // +--------+--------+--------+ 332 // 333 // fixext 2 stores an integer and a byte array whose length is 2 bytes 334 // +--------+--------+--------+--------+ 335 // | 0xd5 | type | data | 336 // +--------+--------+--------+--------+ 337 // 338 // fixext 4 stores an integer and a byte array whose length is 4 bytes 339 // +--------+--------+--------+--------+--------+--------+ 340 // | 0xd6 | type | data | 341 // +--------+--------+--------+--------+--------+--------+ 342 // 343 // fixext 8 stores an integer and a byte array whose length is 8 bytes 344 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ 345 // | 0xd7 | type | data | 346 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ 347 // 348 // fixext 16 stores an integer and a byte array whose length is 16 bytes 349 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ 350 // | 0xd8 | type | data 351 // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ 352 // +--------+--------+--------+--------+--------+--------+--------+--------+ 353 // data (cont.) | 354 // +--------+--------+--------+--------+--------+--------+--------+--------+ 355 // 356 // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes: 357 // +--------+--------+--------+========+ 358 // | 0xc7 |XXXXXXXX| type | data | 359 // +--------+--------+--------+========+ 360 // 361 // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes: 362 // +--------+--------+--------+--------+========+ 363 // | 0xc8 |YYYYYYYY|YYYYYYYY| type | data | 364 // +--------+--------+--------+--------+========+ 365 // 366 // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes: 367 // +--------+--------+--------+--------+--------+--------+========+ 368 // | 0xc9 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| type | data | 369 // +--------+--------+--------+--------+--------+--------+========+ 370 // 371 // where 372 // * XXXXXXXX is a 8-bit unsigned integer which represents N 373 // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N 374 // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N 375 // * N is a length of data 376 // * type is a signed 8-bit signed integer 377 // * type < 0 is reserved for future extension including 2-byte type information 378 if (i == FIXEXT1) 379 length = 1; 380 else if (i == FIXEXT2) 381 length = 2; 382 else if (i == FIXEXT4) 383 length = 4; 384 else if (i == FIXEXT8) 385 length = 8; 386 else if (i == FIXEXT16) 387 length = 16; 388 else if (i == EXT8) 389 length = readUInt1(); 390 else if (i == EXT16) 391 length = readUInt2(); 392 else if (i == EXT32) 393 length = readUInt4(); 394 extType = read(); 395 396 break; 397 } 398 case MAP: { 399 // fixmap stores a map whose length is up to 15 elements 400 // +--------+~~~~~~~~~~~~~~~~~+ 401 // |1000XXXX| N*2 objects | 402 // +--------+~~~~~~~~~~~~~~~~~+ 403 // 404 // map 16 stores a map whose length is up to (2^16)-1 elements 405 // +--------+--------+--------+~~~~~~~~~~~~~~~~~+ 406 // | 0xde |YYYYYYYY|YYYYYYYY| N*2 objects | 407 // +--------+--------+--------+~~~~~~~~~~~~~~~~~+ 408 // 409 // map 32 stores a map whose length is up to (2^32)-1 elements 410 // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ 411 // | 0xdf |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| N*2 objects | 412 // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ 413 // 414 // where 415 // * XXXX is a 4-bit unsigned integer which represents N 416 // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N 417 // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N 418 // * N is the size of a map 419 // * odd elements in objects are keys of a map 420 // * the next element of a key is its associated value 421 if (i <= FIXMAP_U) 422 length = i & 0x0F; 423 else if (i == MAP16) 424 length = readUInt2(); 425 else 426 length = readUInt4(); 427 break; 428 } 429 default: 430 throw ioex("Invalid flag 0xC1 detected in stream."); 431 } 432 return currentDataType; 433 } 434 435 /** 436 * Read a double from the stream. 437 */ 438 double readDouble() throws IOException { 439 return Double.longBitsToDouble(readLong()); 440 } 441 442 /** 443 * Read a float from the stream. 444 */ 445 float readFloat() throws IOException { 446 return Float.intBitsToFloat(readInt()); 447 } 448 449 /** 450 * Read an integer from the stream. 451 */ 452 int readInt() throws IOException { 453 if (length == 0) 454 return lastByte; 455 if (length == 1) 456 return read(); 457 if (length == 2) 458 return (read() << 8) | read(); 459 int i = read(); 460 i <<= 8; 461 i |= read(); 462 i <<= 8; 463 i |= read(); 464 i <<= 8; 465 i |= read(); 466 return i; 467 } 468 469 /** 470 * Returns the length value for the field. 471 * 472 * <p> 473 * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag). 474 * For arrays, it's the number of array entries. 475 * For maps, it's the number of map entries. 476 */ 477 long readLength() { 478 return length; 479 } 480 481 /** 482 * Read 64-bit long from the stream. 483 */ 484 long readLong() throws IOException { 485 if (length == 4) 486 return readUInt4(); 487 long l = read(); 488 l <<= 8; 489 l |= read(); 490 l <<= 8; 491 l |= read(); 492 l <<= 8; 493 l |= read(); 494 l <<= 8; 495 l |= read(); 496 l <<= 8; 497 l |= read(); 498 l <<= 8; 499 l |= read(); 500 l <<= 8; 501 l |= read(); 502 return l; 503 } 504 505 /** 506 * Read a string from the stream. 507 */ 508 String readString() throws IOException { 509 return new String(readBinary(), UTF8); 510 } 511}