001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.msgpack;
014
015import static org.apache.juneau.common.internal.IOUtils.*;
016import static org.apache.juneau.msgpack.DataType.*;
017
018import java.io.*;
019
020import org.apache.juneau.parser.*;
021
022/**
023 * Specialized input stream for parsing MessagePack streams.
024 *
025 * <h5 class='section'>Notes:</h5><ul>
026 *    <li class='note'>
027 *       This class is not intended for external use.
028 * </ul>
029 *
030 * <h5 class='section'>See Also:</h5><ul>
031 *    <li class='link'><a class="doclink" href="../../../../index.html#jm.MsgPackDetails">MessagePack Details</a>
032
033 * </ul>
034 */
035public final class MsgPackInputStream extends ParserInputStream {
036
037   private DataType currentDataType;
038   private long length;
039   private int lastByte;
040   private int extType;
041   int pos = 0;
042
043   // Data type quick-lookup table.
044   private static final DataType[] TYPES = new DataType[] {
045      /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
046      /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
047      /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
048      /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
049      /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
050      /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
051      /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
052      /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
053      /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,
054      /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,
055      /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
056      /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
057      /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG,
058      /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP,
059      /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
060      /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT
061   };
062
063   /**
064    * Constructor.
065    *
066    * @param pipe The parser input.
067    * @throws IOException Thrown by underlying stream.
068    */
069   protected MsgPackInputStream(ParserPipe pipe) throws IOException {
070      super(pipe);
071   }
072
073   /**
074    * Reads the data type flag from the stream.
075    *
076    * <p>
077    * This is the byte that indicates what kind of data follows.
078    */
079   DataType readDataType() throws IOException {
080      int i = read();
081      if (i == -1)
082         throw new IOException("Unexpected end of file found at position "+pos);
083      currentDataType = TYPES[i];
084      switch (currentDataType) {
085         case NULL:
086         case FLOAT: {
087            length = 4;
088            break;
089         }
090         case DOUBLE: {
091            length = 8;
092            break;
093         }
094         case BOOLEAN: {
095            lastByte = i;
096            break;
097         }
098         case INT: {
099            // positive fixnum stores 7-bit positive integer
100            // +--------+
101            // |0XXXXXXX|
102            // +--------+
103            //
104            // negative fixnum stores 5-bit negative integer
105            // +--------+
106            // |111YYYYY|
107            // +--------+
108            //
109            // * 0XXXXXXX is 8-bit unsigned integer
110            // * 111YYYYY is 8-bit signed integer
111            //
112            // uint 8 stores a 8-bit unsigned integer
113            // +--------+--------+
114            // |  0xcc  |ZZZZZZZZ|
115            // +--------+--------+
116            //
117            // uint 16 stores a 16-bit big-endian unsigned integer
118            // +--------+--------+--------+
119            // |  0xcd  |ZZZZZZZZ|ZZZZZZZZ|
120            // +--------+--------+--------+
121            //
122            // uint 32 stores a 32-bit big-endian unsigned integer
123            // +--------+--------+--------+--------+--------+
124            // |  0xce  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
125            // +--------+--------+--------+--------+--------+
126            //
127            // uint 64 stores a 64-bit big-endian unsigned integer
128            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
129            // |  0xcf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
130            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
131            //
132            // int 8 stores a 8-bit signed integer
133            // +--------+--------+
134            // |  0xd0  |ZZZZZZZZ|
135            // +--------+--------+
136            //
137            // int 16 stores a 16-bit big-endian signed integer
138            // +--------+--------+--------+
139            // |  0xd1  |ZZZZZZZZ|ZZZZZZZZ|
140            // +--------+--------+--------+
141            //
142            // int 32 stores a 32-bit big-endian signed integer
143            // +--------+--------+--------+--------+--------+
144            // |  0xd2  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
145            // +--------+--------+--------+--------+--------+
146            //
147            // int 64 stores a 64-bit big-endian signed integer
148            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
149            // |  0xd3  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
150            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
151            lastByte = i;
152            if (i <= POSFIXINT_U)
153               length = 0;
154            else if (i >= NEGFIXINT_L)
155               length = -1;
156            else if (i == INT8 || i == UINT8)
157               length = 1;
158            else if (i == INT16 || i == UINT16)
159               length = 2;
160            else if (i == INT32)
161               length = 4;
162            else
163               length = 0;
164            break;
165         }
166         case LONG: {
167            if (i == UINT32)
168               length = 4;
169            else if (i == INT64 || i == UINT64)
170               length = 8;
171            else
172               length = 0;
173            break;
174         }
175         case STRING:{
176            // fixstr stores a byte array whose length is up to 31 bytes:
177            // +--------+========+
178            // |101XXXXX|  data  |
179            // +--------+========+
180            //
181            // str 8 stores a byte array whose length is up to (2^8)-1 bytes:
182            // +--------+--------+========+
183            // |  0xd9  |YYYYYYYY|  data  |
184            // +--------+--------+========+
185            //
186            // str 16 stores a byte array whose length is up to (2^16)-1 bytes:
187            // +--------+--------+--------+========+
188            // |  0xda  |ZZZZZZZZ|ZZZZZZZZ|  data  |
189            // +--------+--------+--------+========+
190            //
191            // str 32 stores a byte array whose length is up to (2^32)-1 bytes:
192            // +--------+--------+--------+--------+--------+========+
193            // |  0xdb  |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA|  data  |
194            // +--------+--------+--------+--------+--------+========+
195            //
196            // where
197            // * XXXXX is a 5-bit unsigned integer which represents N
198            // * YYYYYYYY is a 8-bit unsigned integer which represents N
199            // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N
200            // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N
201            // * N is the length of data
202            if (i <= FIXSTR_U)
203               length = i & 0x1F;
204            else if (i == STR8)
205               length = readUInt1();
206            else if (i == STR16)
207               length = readUInt2();
208            else
209               length = readUInt4();
210            break;
211         }
212         case ARRAY: {
213            // fixarray stores an array whose length is up to 15 elements:
214            // +--------+~~~~~~~~~~~~~~~~~+
215            // |1001XXXX|    N objects    |
216            // +--------+~~~~~~~~~~~~~~~~~+
217            //
218            // array 16 stores an array whose length is up to (2^16)-1 elements:
219            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
220            // |  0xdc  |YYYYYYYY|YYYYYYYY|    N objects    |
221            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
222            //
223            // array 32 stores an array whose length is up to (2^32)-1 elements:
224            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
225            // |  0xdd  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|    N objects    |
226            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
227            //
228            // where
229            // * XXXX is a 4-bit unsigned integer which represents N
230            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
231            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
232            //     N is the size of a array
233            if (i <= FIXARRAY_U)
234               length = i & 0x0F;
235            else if (i == ARRAY16)
236               length = readUInt2();
237            else
238               length = readUInt4();
239            break;
240         }
241         case BIN:{
242            // bin 8 stores a byte array whose length is up to (2^8)-1 bytes:
243            // +--------+--------+========+
244            // |  0xc4  |XXXXXXXX|  data  |
245            // +--------+--------+========+
246            //
247            // bin 16 stores a byte array whose length is up to (2^16)-1 bytes:
248            // +--------+--------+--------+========+
249            // |  0xc5  |YYYYYYYY|YYYYYYYY|  data  |
250            // +--------+--------+--------+========+
251            //
252            // bin 32 stores a byte array whose length is up to (2^32)-1 bytes:
253            // +--------+--------+--------+--------+--------+========+
254            // |  0xc6  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  data  |
255            // +--------+--------+--------+--------+--------+========+
256            //
257            // where
258            // * XXXXXXXX is a 8-bit unsigned integer which represents N
259            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
260            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
261            // * N is the length of data
262            if (i == BIN8)
263               length = readUInt1();
264            else if (i == BIN16)
265               length = readUInt2();
266            else
267               length = readUInt4();
268            break;
269         }
270         case EXT:{
271            // fixext 1 stores an integer and a byte array whose length is 1 byte
272            // +--------+--------+--------+
273            // |  0xd4  |  type  |  data  |
274            // +--------+--------+--------+
275            //
276            // fixext 2 stores an integer and a byte array whose length is 2 bytes
277            // +--------+--------+--------+--------+
278            // |  0xd5  |  type  |       data      |
279            // +--------+--------+--------+--------+
280            //
281            // fixext 4 stores an integer and a byte array whose length is 4 bytes
282            // +--------+--------+--------+--------+--------+--------+
283            // |  0xd6  |  type  |                data               |
284            // +--------+--------+--------+--------+--------+--------+
285            //
286            // fixext 8 stores an integer and a byte array whose length is 8 bytes
287            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
288            // |  0xd7  |  type  |                                  data                                 |
289            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
290            //
291            // fixext 16 stores an integer and a byte array whose length is 16 bytes
292            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
293            // |  0xd8  |  type  |                                  data
294            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
295            // +--------+--------+--------+--------+--------+--------+--------+--------+
296            //                               data (cont.)                              |
297            // +--------+--------+--------+--------+--------+--------+--------+--------+
298            //
299            // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes:
300            // +--------+--------+--------+========+
301            // |  0xc7  |XXXXXXXX|  type  |  data  |
302            // +--------+--------+--------+========+
303            //
304            // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes:
305            // +--------+--------+--------+--------+========+
306            // |  0xc8  |YYYYYYYY|YYYYYYYY|  type  |  data  |
307            // +--------+--------+--------+--------+========+
308            //
309            // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes:
310            // +--------+--------+--------+--------+--------+--------+========+
311            // |  0xc9  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  type  |  data  |
312            // +--------+--------+--------+--------+--------+--------+========+
313            //
314            // where
315            // * XXXXXXXX is a 8-bit unsigned integer which represents N
316            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
317            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N
318            // * N is a length of data
319            // * type is a signed 8-bit signed integer
320            // * type < 0 is reserved for future extension including 2-byte type information
321            if (i == FIXEXT1)
322               length = 1;
323            else if (i == FIXEXT2)
324               length = 2;
325            else if (i == FIXEXT4)
326               length = 4;
327            else if (i == FIXEXT8)
328               length = 8;
329            else if (i == FIXEXT16)
330               length = 16;
331            else if (i == EXT8)
332               length = readUInt1();
333            else if (i == EXT16)
334                  length = readUInt2();
335            else if (i == EXT32)
336               length = readUInt4();
337            extType = read();
338
339            break;
340         }
341         case MAP:{
342            // fixmap stores a map whose length is up to 15 elements
343            // +--------+~~~~~~~~~~~~~~~~~+
344            // |1000XXXX|   N*2 objects   |
345            // +--------+~~~~~~~~~~~~~~~~~+
346            //
347            // map 16 stores a map whose length is up to (2^16)-1 elements
348            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
349            // |  0xde  |YYYYYYYY|YYYYYYYY|   N*2 objects   |
350            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
351            //
352            // map 32 stores a map whose length is up to (2^32)-1 elements
353            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
354            // |  0xdf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|   N*2 objects   |
355            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
356            //
357            // where
358            // * XXXX is a 4-bit unsigned integer which represents N
359            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
360            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
361            // * N is the size of a map
362            // * odd elements in objects are keys of a map
363            // * the next element of a key is its associated value
364            if (i <= FIXMAP_U)
365               length = i & 0x0F;
366            else if (i == MAP16)
367               length = readUInt2();
368            else
369               length = readUInt4();
370            break;
371         }
372         default:
373            throw new IOException("Invalid flag 0xC1 detected in stream.");
374      }
375      return currentDataType;
376   }
377
378   /**
379    * Returns the length value for the field.
380    *
381    * <p>
382    * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag).
383    * For arrays, it's the number of array entries.
384    * For maps, it's the number of map entries.
385    */
386   long readLength() {
387      return length;
388   }
389
390   /**
391    * Read a boolean from the stream.
392    */
393   boolean readBoolean() {
394      return lastByte == TRUE;
395   }
396
397   /**
398    * Read a string from the stream.
399    */
400   String readString() throws IOException {
401      return new String(readBinary(), UTF8);
402   }
403
404   /**
405    * Read a binary field from the stream.
406    */
407   byte[] readBinary() throws IOException {
408      byte[] b = new byte[(int)length];
409      read(b);
410      return b;
411   }
412
413   /**
414    * Read an integer from the stream.
415    */
416   int readInt() throws IOException {
417      if (length == 0)
418         return lastByte;
419      if (length == 1)
420         return read();
421      if (length == 2)
422         return (read() << 8) | read();
423      int i = read(); i <<= 8; i |= read(); i <<= 8; i |= read(); i <<= 8; i |= read();
424      return i;
425   }
426
427   /**
428    * Read a float from the stream.
429    */
430   float readFloat() throws IOException {
431      return Float.intBitsToFloat(readInt());
432   }
433
434   /**
435    * Read a double from the stream.
436    */
437   double readDouble() throws IOException {
438      return Double.longBitsToDouble(readLong());
439   }
440
441   /**
442    * Read 64-bit long from the stream.
443    */
444   long readLong() throws IOException {
445      if (length == 4)
446         return readUInt4();
447      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
448      return l;
449   }
450
451   /**
452    * Return the extended-format type.
453    * Currently not used.
454    */
455   int getExtType() {
456      return extType;
457   }
458
459   /**
460    * Read one byte from the stream.
461    */
462   private int readUInt1() throws IOException {
463      return read();
464   }
465
466   /**
467    * Read two bytes from the stream.
468    */
469   private int readUInt2() throws IOException {
470      return (read() << 8) | read();
471   }
472
473   /**
474    * Read four bytes from the stream.
475    */
476   private long readUInt4() throws IOException {
477      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
478      return l;
479   }
480}