001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.msgpack;
018
019import static org.apache.juneau.common.utils.IOUtils.*;
020import static org.apache.juneau.msgpack.DataType.*;
021
022import java.io.*;
023
024import org.apache.juneau.parser.*;
025
026/**
027 * Specialized input stream for parsing MessagePack streams.
028 *
029 * <h5 class='section'>Notes:</h5><ul>
030 *    <li class='note'>
031 *       This class is not intended for external use.
032 * </ul>
033 *
034 * <h5 class='section'>See Also:</h5><ul>
035 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/MessagePackBasics">MessagePack Basics</a>
036
037 * </ul>
038 */
039public class MsgPackInputStream extends ParserInputStream {
040
041   private DataType currentDataType;
042   private long length;
043   private int lastByte;
044   private int extType;
045   int pos = 0;
046
047   // Data type quick-lookup table.
048   private static final DataType[] TYPES = {
049      /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
050      /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
051      /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
052      /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
053      /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
054      /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
055      /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
056      /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
057      /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,
058      /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,
059      /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
060      /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
061      /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG,
062      /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP,
063      /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
064      /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT
065   };
066
067   /**
068    * Constructor.
069    *
070    * @param pipe The parser input.
071    * @throws IOException Thrown by underlying stream.
072    */
073   protected MsgPackInputStream(ParserPipe pipe) throws IOException {
074      super(pipe);
075   }
076
077   /**
078    * Reads the data type flag from the stream.
079    *
080    * <p>
081    * This is the byte that indicates what kind of data follows.
082    */
083   DataType readDataType() throws IOException {
084      int i = read();
085      if (i == -1)
086         throw new IOException("Unexpected end of file found at position "+pos);
087      currentDataType = TYPES[i];
088      switch (currentDataType) {
089         case NULL:
090         case FLOAT: {
091            length = 4;
092            break;
093         }
094         case DOUBLE: {
095            length = 8;
096            break;
097         }
098         case BOOLEAN: {
099            lastByte = i;
100            break;
101         }
102         case INT: {
103            // positive fixnum stores 7-bit positive integer
104            // +--------+
105            // |0XXXXXXX|
106            // +--------+
107            //
108            // negative fixnum stores 5-bit negative integer
109            // +--------+
110            // |111YYYYY|
111            // +--------+
112            //
113            // * 0XXXXXXX is 8-bit unsigned integer
114            // * 111YYYYY is 8-bit signed integer
115            //
116            // uint 8 stores a 8-bit unsigned integer
117            // +--------+--------+
118            // |  0xcc  |ZZZZZZZZ|
119            // +--------+--------+
120            //
121            // uint 16 stores a 16-bit big-endian unsigned integer
122            // +--------+--------+--------+
123            // |  0xcd  |ZZZZZZZZ|ZZZZZZZZ|
124            // +--------+--------+--------+
125            //
126            // uint 32 stores a 32-bit big-endian unsigned integer
127            // +--------+--------+--------+--------+--------+
128            // |  0xce  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
129            // +--------+--------+--------+--------+--------+
130            //
131            // uint 64 stores a 64-bit big-endian unsigned integer
132            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
133            // |  0xcf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
134            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
135            //
136            // int 8 stores a 8-bit signed integer
137            // +--------+--------+
138            // |  0xd0  |ZZZZZZZZ|
139            // +--------+--------+
140            //
141            // int 16 stores a 16-bit big-endian signed integer
142            // +--------+--------+--------+
143            // |  0xd1  |ZZZZZZZZ|ZZZZZZZZ|
144            // +--------+--------+--------+
145            //
146            // int 32 stores a 32-bit big-endian signed integer
147            // +--------+--------+--------+--------+--------+
148            // |  0xd2  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
149            // +--------+--------+--------+--------+--------+
150            //
151            // int 64 stores a 64-bit big-endian signed integer
152            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
153            // |  0xd3  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
154            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
155            lastByte = i;
156            if (i <= POSFIXINT_U)
157               length = 0;
158            else if (i >= NEGFIXINT_L)
159               length = -1;
160            else if (i == INT8 || i == UINT8)
161               length = 1;
162            else if (i == INT16 || i == UINT16)
163               length = 2;
164            else if (i == INT32)
165               length = 4;
166            else
167               length = 0;
168            break;
169         }
170         case LONG: {
171            if (i == UINT32)
172               length = 4;
173            else if (i == INT64 || i == UINT64)
174               length = 8;
175            else
176               length = 0;
177            break;
178         }
179         case STRING:{
180            // fixstr stores a byte array whose length is up to 31 bytes:
181            // +--------+========+
182            // |101XXXXX|  data  |
183            // +--------+========+
184            //
185            // str 8 stores a byte array whose length is up to (2^8)-1 bytes:
186            // +--------+--------+========+
187            // |  0xd9  |YYYYYYYY|  data  |
188            // +--------+--------+========+
189            //
190            // str 16 stores a byte array whose length is up to (2^16)-1 bytes:
191            // +--------+--------+--------+========+
192            // |  0xda  |ZZZZZZZZ|ZZZZZZZZ|  data  |
193            // +--------+--------+--------+========+
194            //
195            // str 32 stores a byte array whose length is up to (2^32)-1 bytes:
196            // +--------+--------+--------+--------+--------+========+
197            // |  0xdb  |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA|  data  |
198            // +--------+--------+--------+--------+--------+========+
199            //
200            // where
201            // * XXXXX is a 5-bit unsigned integer which represents N
202            // * YYYYYYYY is a 8-bit unsigned integer which represents N
203            // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N
204            // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N
205            // * N is the length of data
206            if (i <= FIXSTR_U)
207               length = i & 0x1F;
208            else if (i == STR8)
209               length = readUInt1();
210            else if (i == STR16)
211               length = readUInt2();
212            else
213               length = readUInt4();
214            break;
215         }
216         case ARRAY: {
217            // fixarray stores an array whose length is up to 15 elements:
218            // +--------+~~~~~~~~~~~~~~~~~+
219            // |1001XXXX|    N objects    |
220            // +--------+~~~~~~~~~~~~~~~~~+
221            //
222            // array 16 stores an array whose length is up to (2^16)-1 elements:
223            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
224            // |  0xdc  |YYYYYYYY|YYYYYYYY|    N objects    |
225            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
226            //
227            // array 32 stores an array whose length is up to (2^32)-1 elements:
228            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
229            // |  0xdd  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|    N objects    |
230            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
231            //
232            // where
233            // * XXXX is a 4-bit unsigned integer which represents N
234            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
235            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
236            //     N is the size of a array
237            if (i <= FIXARRAY_U)
238               length = i & 0x0F;
239            else if (i == ARRAY16)
240               length = readUInt2();
241            else
242               length = readUInt4();
243            break;
244         }
245         case BIN:{
246            // bin 8 stores a byte array whose length is up to (2^8)-1 bytes:
247            // +--------+--------+========+
248            // |  0xc4  |XXXXXXXX|  data  |
249            // +--------+--------+========+
250            //
251            // bin 16 stores a byte array whose length is up to (2^16)-1 bytes:
252            // +--------+--------+--------+========+
253            // |  0xc5  |YYYYYYYY|YYYYYYYY|  data  |
254            // +--------+--------+--------+========+
255            //
256            // bin 32 stores a byte array whose length is up to (2^32)-1 bytes:
257            // +--------+--------+--------+--------+--------+========+
258            // |  0xc6  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  data  |
259            // +--------+--------+--------+--------+--------+========+
260            //
261            // where
262            // * XXXXXXXX is a 8-bit unsigned integer which represents N
263            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
264            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
265            // * N is the length of data
266            if (i == BIN8)
267               length = readUInt1();
268            else if (i == BIN16)
269               length = readUInt2();
270            else
271               length = readUInt4();
272            break;
273         }
274         case EXT:{
275            // fixext 1 stores an integer and a byte array whose length is 1 byte
276            // +--------+--------+--------+
277            // |  0xd4  |  type  |  data  |
278            // +--------+--------+--------+
279            //
280            // fixext 2 stores an integer and a byte array whose length is 2 bytes
281            // +--------+--------+--------+--------+
282            // |  0xd5  |  type  |       data      |
283            // +--------+--------+--------+--------+
284            //
285            // fixext 4 stores an integer and a byte array whose length is 4 bytes
286            // +--------+--------+--------+--------+--------+--------+
287            // |  0xd6  |  type  |                data               |
288            // +--------+--------+--------+--------+--------+--------+
289            //
290            // fixext 8 stores an integer and a byte array whose length is 8 bytes
291            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
292            // |  0xd7  |  type  |                                  data                                 |
293            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
294            //
295            // fixext 16 stores an integer and a byte array whose length is 16 bytes
296            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
297            // |  0xd8  |  type  |                                  data
298            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
299            // +--------+--------+--------+--------+--------+--------+--------+--------+
300            //                               data (cont.)                              |
301            // +--------+--------+--------+--------+--------+--------+--------+--------+
302            //
303            // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes:
304            // +--------+--------+--------+========+
305            // |  0xc7  |XXXXXXXX|  type  |  data  |
306            // +--------+--------+--------+========+
307            //
308            // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes:
309            // +--------+--------+--------+--------+========+
310            // |  0xc8  |YYYYYYYY|YYYYYYYY|  type  |  data  |
311            // +--------+--------+--------+--------+========+
312            //
313            // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes:
314            // +--------+--------+--------+--------+--------+--------+========+
315            // |  0xc9  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  type  |  data  |
316            // +--------+--------+--------+--------+--------+--------+========+
317            //
318            // where
319            // * XXXXXXXX is a 8-bit unsigned integer which represents N
320            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
321            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N
322            // * N is a length of data
323            // * type is a signed 8-bit signed integer
324            // * type < 0 is reserved for future extension including 2-byte type information
325            if (i == FIXEXT1)
326               length = 1;
327            else if (i == FIXEXT2)
328               length = 2;
329            else if (i == FIXEXT4)
330               length = 4;
331            else if (i == FIXEXT8)
332               length = 8;
333            else if (i == FIXEXT16)
334               length = 16;
335            else if (i == EXT8)
336               length = readUInt1();
337            else if (i == EXT16)
338                  length = readUInt2();
339            else if (i == EXT32)
340               length = readUInt4();
341            extType = read();
342
343            break;
344         }
345         case MAP:{
346            // fixmap stores a map whose length is up to 15 elements
347            // +--------+~~~~~~~~~~~~~~~~~+
348            // |1000XXXX|   N*2 objects   |
349            // +--------+~~~~~~~~~~~~~~~~~+
350            //
351            // map 16 stores a map whose length is up to (2^16)-1 elements
352            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
353            // |  0xde  |YYYYYYYY|YYYYYYYY|   N*2 objects   |
354            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
355            //
356            // map 32 stores a map whose length is up to (2^32)-1 elements
357            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
358            // |  0xdf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|   N*2 objects   |
359            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
360            //
361            // where
362            // * XXXX is a 4-bit unsigned integer which represents N
363            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
364            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
365            // * N is the size of a map
366            // * odd elements in objects are keys of a map
367            // * the next element of a key is its associated value
368            if (i <= FIXMAP_U)
369               length = i & 0x0F;
370            else if (i == MAP16)
371               length = readUInt2();
372            else
373               length = readUInt4();
374            break;
375         }
376         default:
377            throw new IOException("Invalid flag 0xC1 detected in stream.");
378      }
379      return currentDataType;
380   }
381
382   /**
383    * Returns the length value for the field.
384    *
385    * <p>
386    * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag).
387    * For arrays, it's the number of array entries.
388    * For maps, it's the number of map entries.
389    */
390   long readLength() {
391      return length;
392   }
393
394   /**
395    * Read a boolean from the stream.
396    */
397   boolean readBoolean() {
398      return lastByte == TRUE;
399   }
400
401   /**
402    * Read a string from the stream.
403    */
404   String readString() throws IOException {
405      return new String(readBinary(), UTF8);
406   }
407
408   /**
409    * Read a binary field from the stream.
410    */
411   byte[] readBinary() throws IOException {
412      byte[] b = new byte[(int)length];
413      read(b);
414      return b;
415   }
416
417   /**
418    * Read an integer from the stream.
419    */
420   int readInt() throws IOException {
421      if (length == 0)
422         return lastByte;
423      if (length == 1)
424         return read();
425      if (length == 2)
426         return (read() << 8) | read();
427      int i = read(); i <<= 8; i |= read(); i <<= 8; i |= read(); i <<= 8; i |= read();
428      return i;
429   }
430
431   /**
432    * Read a float from the stream.
433    */
434   float readFloat() throws IOException {
435      return Float.intBitsToFloat(readInt());
436   }
437
438   /**
439    * Read a double from the stream.
440    */
441   double readDouble() throws IOException {
442      return Double.longBitsToDouble(readLong());
443   }
444
445   /**
446    * Read 64-bit long from the stream.
447    */
448   long readLong() throws IOException {
449      if (length == 4)
450         return readUInt4();
451      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
452      return l;
453   }
454
455   /**
456    * Return the extended-format type.
457    * Currently not used.
458    */
459   int getExtType() {
460      return extType;
461   }
462
463   /**
464    * Read one byte from the stream.
465    */
466   private int readUInt1() throws IOException {
467      return read();
468   }
469
470   /**
471    * Read two bytes from the stream.
472    */
473   private int readUInt2() throws IOException {
474      return (read() << 8) | read();
475   }
476
477   /**
478    * Read four bytes from the stream.
479    */
480   private long readUInt4() throws IOException {
481      long l = read(); l <<= 8; l |= read(); l <<= 8; l |= read(); l <<= 8; l |= read();
482      return l;
483   }
484}