001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.msgpack;
014
015import static org.apache.juneau.internal.IOUtils.*;
016import static org.apache.juneau.msgpack.DataType.*;
017
018import java.io.*;
019
020import org.apache.juneau.parser.*;
021
022/**
023 * Specialized input stream for parsing MessagePack streams.
024 *
025 * <ul class='notes'>
026 *    <li>
027 *       This class is not intended for external use.
028 * </ul>
029 */
030public final class MsgPackInputStream extends ParserInputStream {
031
032   private DataType currentDataType;
033   private long length;
034   private int lastByte;
035   private int extType;
036   int pos = 0;
037
038   // Data type quick-lookup table.
039   private static final DataType[] TYPES = new DataType[] {
040      /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
041      /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
042      /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
043      /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
044      /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
045      /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
046      /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
047      /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
048      /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,
049      /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,
050      /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
051      /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
052      /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG,
053      /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP,
054      /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
055      /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT
056   };
057
058   /**
059    * Constructor.
060    *
061    * @param pipe The parser input.
062    * @throws IOException Thrown by underlying stream.
063    */
064   protected MsgPackInputStream(ParserPipe pipe) throws IOException {
065      super(pipe);
066   }
067
068   /**
069    * Reads the data type flag from the stream.
070    *
071    * <p>
072    * This is the byte that indicates what kind of data follows.
073    */
074   DataType readDataType() throws IOException {
075      int i = read();
076      if (i == -1)
077         throw new IOException("Unexpected end of file found at position " + pos);
078      currentDataType = TYPES[i];
079      switch (currentDataType) {
080         case NULL:
081         case FLOAT: {
082            length = 4;
083            break;
084         }
085         case DOUBLE: {
086            length = 8;
087            break;
088         }
089         case BOOLEAN: {
090            lastByte = i;
091            break;
092         }
093         case INT: {
094            // positive fixnum stores 7-bit positive integer
095            // +--------+
096            // |0XXXXXXX|
097            // +--------+
098            //
099            // negative fixnum stores 5-bit negative integer
100            // +--------+
101            // |111YYYYY|
102            // +--------+
103            //
104            // * 0XXXXXXX is 8-bit unsigned integer
105            // * 111YYYYY is 8-bit signed integer
106            //
107            // uint 8 stores a 8-bit unsigned integer
108            // +--------+--------+
109            // |  0xcc  |ZZZZZZZZ|
110            // +--------+--------+
111            //
112            // uint 16 stores a 16-bit big-endian unsigned integer
113            // +--------+--------+--------+
114            // |  0xcd  |ZZZZZZZZ|ZZZZZZZZ|
115            // +--------+--------+--------+
116            //
117            // uint 32 stores a 32-bit big-endian unsigned integer
118            // +--------+--------+--------+--------+--------+
119            // |  0xce  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
120            // +--------+--------+--------+--------+--------+
121            //
122            // uint 64 stores a 64-bit big-endian unsigned integer
123            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
124            // |  0xcf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
125            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
126            //
127            // int 8 stores a 8-bit signed integer
128            // +--------+--------+
129            // |  0xd0  |ZZZZZZZZ|
130            // +--------+--------+
131            //
132            // int 16 stores a 16-bit big-endian signed integer
133            // +--------+--------+--------+
134            // |  0xd1  |ZZZZZZZZ|ZZZZZZZZ|
135            // +--------+--------+--------+
136            //
137            // int 32 stores a 32-bit big-endian signed integer
138            // +--------+--------+--------+--------+--------+
139            // |  0xd2  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
140            // +--------+--------+--------+--------+--------+
141            //
142            // int 64 stores a 64-bit big-endian signed integer
143            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
144            // |  0xd3  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
145            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
146            lastByte = i;
147            if (i <= POSFIXINT_U)
148               length = 0;
149            else if (i >= NEGFIXINT_L)
150               length = -1;
151            else if (i == INT8 || i == UINT8)
152               length = 1;
153            else if (i == INT16 || i == UINT16)
154               length = 2;
155            else if (i == INT32)
156               length = 4;
157            else
158               length = 0;
159            break;
160         }
161         case LONG: {
162            if (i == UINT32)
163               length = 4;
164            else if (i == INT64 || i == UINT64)
165               length = 8;
166            else
167               length = 0;
168            break;
169         }
170         case STRING:{
171            // fixstr stores a byte array whose length is up to 31 bytes:
172            // +--------+========+
173            // |101XXXXX|  data  |
174            // +--------+========+
175            //
176            // str 8 stores a byte array whose length is up to (2^8)-1 bytes:
177            // +--------+--------+========+
178            // |  0xd9  |YYYYYYYY|  data  |
179            // +--------+--------+========+
180            //
181            // str 16 stores a byte array whose length is up to (2^16)-1 bytes:
182            // +--------+--------+--------+========+
183            // |  0xda  |ZZZZZZZZ|ZZZZZZZZ|  data  |
184            // +--------+--------+--------+========+
185            //
186            // str 32 stores a byte array whose length is up to (2^32)-1 bytes:
187            // +--------+--------+--------+--------+--------+========+
188            // |  0xdb  |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA|  data  |
189            // +--------+--------+--------+--------+--------+========+
190            //
191            // where
192            // * XXXXX is a 5-bit unsigned integer which represents N
193            // * YYYYYYYY is a 8-bit unsigned integer which represents N
194            // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N
195            // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N
196            // * N is the length of data
197            if (i <= FIXSTR_U)
198               length = i & 0x1F;
199            else if (i == STR8)
200               length = readUInt1();
201            else if (i == STR16)
202               length = readUInt2();
203            else
204               length = readUInt4();
205            break;
206         }
207         case ARRAY: {
208            // fixarray stores an array whose length is up to 15 elements:
209            // +--------+~~~~~~~~~~~~~~~~~+
210            // |1001XXXX|    N objects    |
211            // +--------+~~~~~~~~~~~~~~~~~+
212            //
213            // array 16 stores an array whose length is up to (2^16)-1 elements:
214            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
215            // |  0xdc  |YYYYYYYY|YYYYYYYY|    N objects    |
216            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
217            //
218            // array 32 stores an array whose length is up to (2^32)-1 elements:
219            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
220            // |  0xdd  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|    N objects    |
221            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
222            //
223            // where
224            // * XXXX is a 4-bit unsigned integer which represents N
225            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
226            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
227            //     N is the size of a array
228            if (i <= FIXARRAY_U)
229               length = i & 0x0F;
230            else if (i == ARRAY16)
231               length = readUInt2();
232            else
233               length = readUInt4();
234            break;
235         }
236         case BIN:{
237            // bin 8 stores a byte array whose length is up to (2^8)-1 bytes:
238            // +--------+--------+========+
239            // |  0xc4  |XXXXXXXX|  data  |
240            // +--------+--------+========+
241            //
242            // bin 16 stores a byte array whose length is up to (2^16)-1 bytes:
243            // +--------+--------+--------+========+
244            // |  0xc5  |YYYYYYYY|YYYYYYYY|  data  |
245            // +--------+--------+--------+========+
246            //
247            // bin 32 stores a byte array whose length is up to (2^32)-1 bytes:
248            // +--------+--------+--------+--------+--------+========+
249            // |  0xc6  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  data  |
250            // +--------+--------+--------+--------+--------+========+
251            //
252            // where
253            // * XXXXXXXX is a 8-bit unsigned integer which represents N
254            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
255            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
256            // * N is the length of data
257            if (i == BIN8)
258               length = readUInt1();
259            else if (i == BIN16)
260               length = readUInt2();
261            else
262               length = readUInt4();
263            break;
264         }
265         case EXT:{
266            // fixext 1 stores an integer and a byte array whose length is 1 byte
267            // +--------+--------+--------+
268            // |  0xd4  |  type  |  data  |
269            // +--------+--------+--------+
270            //
271            // fixext 2 stores an integer and a byte array whose length is 2 bytes
272            // +--------+--------+--------+--------+
273            // |  0xd5  |  type  |       data      |
274            // +--------+--------+--------+--------+
275            //
276            // fixext 4 stores an integer and a byte array whose length is 4 bytes
277            // +--------+--------+--------+--------+--------+--------+
278            // |  0xd6  |  type  |                data               |
279            // +--------+--------+--------+--------+--------+--------+
280            //
281            // fixext 8 stores an integer and a byte array whose length is 8 bytes
282            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
283            // |  0xd7  |  type  |                                  data                                 |
284            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
285            //
286            // fixext 16 stores an integer and a byte array whose length is 16 bytes
287            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
288            // |  0xd8  |  type  |                                  data
289            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
290            // +--------+--------+--------+--------+--------+--------+--------+--------+
291            //                               data (cont.)                              |
292            // +--------+--------+--------+--------+--------+--------+--------+--------+
293            //
294            // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes:
295            // +--------+--------+--------+========+
296            // |  0xc7  |XXXXXXXX|  type  |  data  |
297            // +--------+--------+--------+========+
298            //
299            // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes:
300            // +--------+--------+--------+--------+========+
301            // |  0xc8  |YYYYYYYY|YYYYYYYY|  type  |  data  |
302            // +--------+--------+--------+--------+========+
303            //
304            // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes:
305            // +--------+--------+--------+--------+--------+--------+========+
306            // |  0xc9  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  type  |  data  |
307            // +--------+--------+--------+--------+--------+--------+========+
308            //
309            // where
310            // * XXXXXXXX is a 8-bit unsigned integer which represents N
311            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
312            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N
313            // * N is a length of data
314            // * type is a signed 8-bit signed integer
315            // * type < 0 is reserved for future extension including 2-byte type information
316            if (i == FIXEXT1)
317               length = 1;
318            else if (i == FIXEXT2)
319               length = 2;
320            else if (i == FIXEXT4)
321               length = 4;
322            else if (i == FIXEXT8)
323               length = 8;
324            else if (i == FIXEXT16)
325               length = 16;
326            else if (i == EXT8)
327               length = readUInt1();
328            else if (i == EXT16)
329                  length = readUInt2();
330            else if (i == EXT32)
331               length = readUInt4();
332            extType = read();
333
334            break;
335         }
336         case MAP:{
337            // fixmap stores a map whose length is up to 15 elements
338            // +--------+~~~~~~~~~~~~~~~~~+
339            // |1000XXXX|   N*2 objects   |
340            // +--------+~~~~~~~~~~~~~~~~~+
341            //
342            // map 16 stores a map whose length is up to (2^16)-1 elements
343            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
344            // |  0xde  |YYYYYYYY|YYYYYYYY|   N*2 objects   |
345            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
346            //
347            // map 32 stores a map whose length is up to (2^32)-1 elements
348            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
349            // |  0xdf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|   N*2 objects   |
350            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
351            //
352            // where
353            // * XXXX is a 4-bit unsigned integer which represents N
354            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
355            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
356            // * N is the size of a map
357            // * odd elements in objects are keys of a map
358            // * the next element of a key is its associated value
359            if (i <= FIXMAP_U)
360               length = i & 0x0F;
361            else if (i == MAP16)
362               length = readUInt2();
363            else
364               length = readUInt4();
365            break;
366         }
367         default:
368            throw new IOException("Invalid flag 0xC1 detected in stream.");
369      }
370      return currentDataType;
371   }
372
373   /**
374    * Returns the length value for the field.
375    *
376    * <p>
377    * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag).
378    * For arrays, it's the number of array entries.
379    * For maps, it's the number of map entries.
380    */
381   long readLength() {
382      return length;
383   }
384
385   /**
386    * Read a boolean from the stream.
387    */
388   boolean readBoolean() {
389      return lastByte == TRUE;
390   }
391
392   /**
393    * Read a string from the stream.
394    */
395   String readString() throws IOException {
396      return new String(readBinary(), UTF8);
397   }
398
399   /**
400    * Read a binary field from the stream.
401    */
402   byte[] readBinary() throws IOException {
403      byte[] b = new byte[(int)length];
404      read(b);
405      return b;
406   }
407
408   /**
409    * Read an integer from the stream.
410    */
411   int readInt() throws IOException {
412      if (length == 0)
413         return lastByte;
414      if (length == 1)
415         return read();
416      if (length == 2)
417         return (read() << 8) | read();
418      int i = read(); i <<= 8; i |= read(); i <<= 8; i |= read(); i <<= 8; i |= read();
419      return i;
420   }
421
422   /**
423    * Read a float from the stream.
424    */
425   float readFloat() throws IOException {
426      return Float.intBitsToFloat(readInt());
427   }
428
429   /**
430    * Read a double from the stream.
431    */
432   double readDouble() throws IOException {
433      return Double.longBitsToDouble(readLong());
434   }
435
436   /**
437    * Read 64-bit long from the stream.
438    */
439   long readLong() throws IOException {
440      if (length == 4)
441         return readUInt4();
442      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
443      return l;
444   }
445
446   /**
447    * Return the extended-format type.
448    * Currently not used.
449    */
450   int getExtType() {
451      return extType;
452   }
453
454   /**
455    * Read one byte from the stream.
456    */
457   private int readUInt1() throws IOException {
458      return read();
459   }
460
461   /**
462    * Read two bytes from the stream.
463    */
464   private int readUInt2() throws IOException {
465      return (read() << 8) | read();
466   }
467
468   /**
469    * Read four bytes from the stream.
470    */
471   private long readUInt4() throws IOException {
472      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
473      return l;
474   }
475}