View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.juneau.junit.bct;
18  
19  import static org.apache.juneau.commons.utils.CollectionUtils.*;
20  import static org.apache.juneau.junit.bct.BctAssertions.*;
21  import static org.apache.juneau.junit.bct.NestedTokenizer.*;
22  import static org.junit.jupiter.api.Assertions.*;
23  
24  import org.apache.juneau.*;
25  import org.junit.jupiter.api.*;
26  
27  /**
28   * Comprehensive unit tests for {@link NestedTokenizer} class.
29   *
30   * <p>Tests cover all aspects of the state machine parser including:</p>
31   * <ul>
32   *    <li>Simple token parsing</li>
33   *    <li>Nested token structures</li>
34   *    <li>Escape sequence handling</li>
35   *    <li>Deep nesting scenarios</li>
36   *    <li>Edge cases and error conditions</li>
37   *    <li>Token object functionality</li>
38   * </ul>
39   */
40  class NestedTokenizer_Test extends TestBase {
41  
42  	//------------------------------------------------------------------------------------------------------------------
43  	// Basic tokenization tests
44  	//------------------------------------------------------------------------------------------------------------------
45  
46  	@Test void a01_simpleTokens() {
47  		new NestedTokenizer();
48  
49  		// Single token
50  		var tokens = tokenize("foo");
51  		assertList(tokens, token("foo"));
52  
53  		// Multiple tokens
54  		tokens = tokenize("foo,bar,baz");
55  		assertList(tokens, token("foo"), token("bar"), token("baz"));
56  
57  		// Tokens with whitespace
58  		tokens = tokenize("  foo  ,  bar  ,  baz  ");
59  		assertList(tokens, token("foo"), token("bar"), token("baz"));
60  	}
61  
62  	@Test void a02_nestedTokens() {
63  		// Simple nested structure
64  		var tokens = tokenize("foo{a,b}");
65  		assertSize(1, tokens);
66  		assertToken(tokens.get(0), "foo", "a", "b");
67  
68  		// Multiple tokens with nesting
69  		tokens = tokenize("foo{a,b},bar{c,d}");
70  		assertSize(2, tokens);
71  		assertToken(tokens.get(0), "foo", "a", "b");
72  		assertToken(tokens.get(1), "bar", "c", "d");
73  
74  		// Empty nested content
75  		tokens = tokenize("foo{}");
76  		assertSize(1, tokens);
77  		assertToken(tokens.get(0), "foo");
78  	}
79  
80  	@Test void a03_deepNesting() {
81  		// Two levels deep
82  		var tokens = tokenize("root{level1{a,b},level2}");
83  		assertSize(1, tokens);
84  		var root = tokens.get(0);
85  	assertEquals("root", root.getValue());
86  	assertSize(2, root.getNested());
87  	assertToken(root.getNested().get(0), "level1", "a", "b");
88  		assertToken(root.getNested().get(1), "level2");
89  
90  		// Three levels deep
91  		tokens = tokenize("root{level1{level2{a,b}}}");
92  		assertSize(1, tokens);
93  		root = tokens.get(0);
94  	assertEquals("root", root.getValue());
95  	assertSize(1, root.getNested());
96  	var level1 = root.getNested().get(0);
97  	assertEquals("level1", level1.getValue());
98  	assertSize(1, level1.getNested());
99  	assertToken(level1.getNested().get(0), "level2", "a", "b");
100 	}
101 
102 	@Test void a04_escapeSequences() {
103 		// Escaped comma
104 		var tokens = tokenize("foo\\,bar");
105 		assertList(tokens, token("foo,bar"));
106 
107 		// Escaped braces
108 		tokens = tokenize("foo\\{bar\\}");
109 		assertList(tokens, token("foo{bar}"));
110 
111 		// Escaped backslash
112 		tokens = tokenize("foo\\\\bar");
113 		assertList(tokens, token("foo\\bar"));
114 
115 		// Multiple escapes
116 		tokens = tokenize("foo\\,bar\\{baz\\}");
117 		assertList(tokens, token("foo,bar{baz}"));
118 
119 		// Escape in nested content
120 		tokens = tokenize("root{foo\\,bar,baz}");
121 		assertSize(1, tokens);
122 		assertToken(tokens.get(0), "root", "foo,bar", "baz");
123 	}
124 
125 	//------------------------------------------------------------------------------------------------------------------
126 	// Complex scenarios
127 	//------------------------------------------------------------------------------------------------------------------
128 
129 	@Test void b01_complexNestedStructures() {
130 		// Real-world example: user configuration
131 		var tokens = tokenize("user{name,email,address{street,city,zipcode{main,plus4}}},config{timeout,retries}");
132 		assertSize(2, tokens);
133 
134 	// Validate user token
135 	var user = tokens.get(0);
136 	assertEquals("user", user.getValue());
137 	assertSize(3, user.getNested());
138 	assertEquals("name", user.getNested().get(0).getValue());
139 	assertEquals("email", user.getNested().get(1).getValue());
140 
141 	var address = user.getNested().get(2);
142 	assertEquals("address", address.getValue());
143 	assertSize(3, address.getNested());
144 	assertEquals("street", address.getNested().get(0).getValue());
145 	assertEquals("city", address.getNested().get(1).getValue());
146 
147 	var zipcode = address.getNested().get(2);
148 	assertEquals("zipcode", zipcode.getValue());
149 	assertSize(2, zipcode.getNested());
150 	assertEquals("main", zipcode.getNested().get(0).getValue());
151 	assertEquals("plus4", zipcode.getNested().get(1).getValue());
152 
153 		// Validate config token
154 		var config = tokens.get(1);
155 		assertToken(config, "config", "timeout", "retries");
156 	}
157 
158 	@Test void b02_mixedEscapingAndNesting() {
159 		// Escaped characters within nested structures
160 		var tokens = tokenize("data{key\\,name,value\\{test\\}},info{desc\\,important}");
161 		assertSize(2, tokens);
162 
163 		assertToken(tokens.get(0), "data", "key,name", "value{test}");
164 		assertToken(tokens.get(1), "info", "desc,important");
165 	}
166 
167 	@Test void b03_extremeNesting() {
168 		// Very deep nesting
169 		var tokens = tokenize("l1{l2{l3{l4{l5{value}}}}}");
170 		assertSize(1, tokens);
171 
172 		var current = tokens.get(0);
173 	for (var i = 1; i <= 5; i++) {
174 		assertEquals("l" + i, current.getValue());
175 		assertSize(1, current.getNested());
176 		current = current.getNested().get(0);
177 	}
178 		assertEquals("value", current.getValue());
179 		assertFalse(current.hasNested());
180 	}
181 
182 	//------------------------------------------------------------------------------------------------------------------
183 	// Edge cases and error conditions
184 	//------------------------------------------------------------------------------------------------------------------
185 
186 	@Test void c01_edgeCases() {
187 		// Single character
188 		var tokens = tokenize("a");
189 		assertList(tokens, token("a"));
190 
191 		// Just comma
192 		tokens = tokenize(",");
193 		assertSize(2, tokens);
194 		assertEquals("", tokens.get(0).getValue());
195 		assertEquals("", tokens.get(1).getValue());
196 
197 		// Multiple commas
198 		tokens = tokenize("a,,b");
199 		assertSize(3, tokens);
200 		assertEquals("a", tokens.get(0).getValue());
201 		assertEquals("", tokens.get(1).getValue());
202 		assertEquals("b", tokens.get(2).getValue());
203 
204 		// Trailing comma
205 		tokens = tokenize("a,b,");
206 		assertSize(3, tokens);
207 		assertEquals("a", tokens.get(0).getValue());
208 		assertEquals("b", tokens.get(1).getValue());
209 		assertEquals("", tokens.get(2).getValue());
210 
211 		// Leading comma
212 		tokens = tokenize(",a,b");
213 		assertSize(3, tokens);
214 		assertEquals("", tokens.get(0).getValue());
215 		assertEquals("a", tokens.get(1).getValue());
216 		assertEquals("b", tokens.get(2).getValue());
217 	}
218 
219 	@Test void c02_whitespaceHandling() {
220 		// Various whitespace scenarios
221 		var tokens = tokenize("  a  ,  b  ");
222 		assertList(tokens, token("a"), token("b"));
223 
224 		// Tabs and newlines
225 		tokens = tokenize("\ta\t,\nb\n");
226 		assertList(tokens, token("a"), token("b"));
227 
228 		// Whitespace in nested content
229 		tokens = tokenize("root{  a  ,  b  }");
230 		assertToken(tokens.get(0), "root", "a", "b");
231 
232 		// Whitespace around braces
233 		tokens = tokenize("root  {  a,b  }  ,  other");
234 		assertSize(2, tokens);
235 		assertToken(tokens.get(0), "root", "a", "b");
236 		assertToken(tokens.get(1), "other");
237 	}
238 
239 	@Test void c03_errorConditions() {
240 		// Null input
241 		assertThrows(IllegalArgumentException.class, () -> tokenize(null));
242 
243 		// Empty input
244 		assertThrows(IllegalArgumentException.class, () -> tokenize(""));
245 
246 		// Blank input
247 		assertThrows(IllegalArgumentException.class, () -> tokenize("   "));
248 	}
249 
250 	@Test void c04_finalTokenLogic() {
251 		// Test line 136: final token addition logic
252 
253 		// Case 1: Empty final value with trailing comma (lastWasComma = true)
254 		var tokens = tokenize("a,");
255 		assertSize(2, tokens);
256 		assertEquals("a", tokens.get(0).getValue());
257 		assertEquals("", tokens.get(1).getValue()); // Empty token added due to trailing comma
258 
259 		// Case 2: No tokens yet and empty input should create one empty token
260 		// This is handled by error conditions, but let's test a whitespace-only case after comma
261 		tokens = tokenize(",   ");
262 		assertSize(2, tokens);
263 		assertEquals("", tokens.get(0).getValue());
264 		assertEquals("", tokens.get(1).getValue()); // Empty final value but added because of lastWasComma
265 
266 		// Case 3: Non-empty final value should always be added
267 		tokens = tokenize("a,b");
268 		assertSize(2, tokens);
269 		assertEquals("a", tokens.get(0).getValue());
270 		assertEquals("b", tokens.get(1).getValue());
271 
272 		// Case 4: Test with nested content and trailing comma
273 		tokens = tokenize("root{a,},next");
274 		assertSize(2, tokens);
275 		assertToken(tokens.get(0), "root", "a", ""); // Empty token in nested due to trailing comma
276 		assertEquals("next", tokens.get(1).getValue());
277 	}
278 
279 	//------------------------------------------------------------------------------------------------------------------
280 	// Token object tests
281 	//------------------------------------------------------------------------------------------------------------------
282 
283 	@Test void d01_tokenConstruction() {
284 		// Normal construction
285 		var token = new Token("test");
286 		assertEquals("test", token.getValue());
287 		assertFalse(token.hasNested());
288 		assertEmpty(token.getNested());
289 
290 		// Null value handling
291 		token = new Token(null);
292 		assertEquals("", token.getValue());
293 		assertFalse(token.hasNested());
294 	}
295 
296 	@Test void d02_tokenEquality() {
297 		// Simple tokens
298 		var token1 = new Token("test");
299 		var token2 = new Token("test");
300 		var token3 = new Token("other");
301 
302 		assertEquals(token1, token2);
303 		assertNotEquals(token1, token3);
304 		assertEquals(token1.hashCode(), token2.hashCode());
305 
306 		// Tokens with nested content
307 		var nested1 = new Token("parent");
308 		nested1.setNested(l(new Token("child1"), new Token("child2")));
309 
310 		var nested2 = new Token("parent");
311 		nested2.setNested(l(new Token("child1"), new Token("child2")));
312 
313 		assertEquals(nested1, nested2);
314 		assertEquals(nested1.hashCode(), nested2.hashCode());
315 
316 		// Different nested content
317 		var nested3 = new Token("parent");
318 		nested3.setNested(l(new Token("child1"), new Token("different")));
319 
320 		assertNotEquals(nested1, nested3);
321 	}
322 
323 	@Test void d06_tokenEqualsEdgeCases() {
324 		// Test line 229: equals() method edge cases
325 		var token = new Token("test");
326 
327 		// Case 1: Self equality
328 		assertEquals(token, token);
329 
330 		// Case 2: Null comparison
331 		assertNotEquals(token, null);
332 
333 		// Case 3: Different object type
334 		assertNotEquals(token, "not a token");
335 		assertNotEquals(token, Integer.valueOf(42));
336 
337 		// Case 4: Different value, same nested (null)
338 		var other = new Token("different");
339 		assertNotEquals(token, other);
340 
341 		// Case 5: Same value, different nested content
342 		var token1 = new Token("same");
343 		var token2 = new Token("same");
344 		token1.setNested(l(new Token("child1")));
345 		token2.setNested(l(new Token("child2")));
346 		assertNotEquals(token1, token2);
347 
348 		// Case 6: Same value, one has nested, other doesn't
349 		var token3 = new Token("same");
350 		var token4 = new Token("same");
351 		token3.setNested(l(new Token("child")));
352 		// token4 has no nested content
353 		assertNotEquals(token3, token4);
354 
355 		// Case 7: Both have null nested
356 		var token5 = new Token("same");
357 		var token6 = new Token("same");
358 		token5.setNested(null);
359 		token6.setNested(null);
360 		assertEquals(token5, token6);
361 	}
362 
363 	@Test void d03_tokenToString() {
364 		// Simple token
365 		var token = new Token("test");
366 		assertEquals("test", token.toString());
367 
368 		// Token with nested content
369 		token = new Token("parent");
370 		token.setNested(l(new Token("child1"), new Token("child2")));
371 		assertEquals("parent{child1,child2}", token.toString());
372 
373 		// Deep nesting
374 		var child = new Token("child");
375 		child.setNested(l(new Token("grandchild")));
376 		token = new Token("parent");
377 		token.setNested(l(child));
378 		assertEquals("parent{child{grandchild}}", token.toString());
379 	}
380 
381 	@Test void d04_tokenNestedAccess() {
382 		var parent = new Token("parent");
383 
384 		// Initially no nested content
385 		assertFalse(parent.hasNested());
386 		assertEmpty(parent.getNested());
387 
388 	// Add nested content
389 	parent.setNested(l(new Token("child1"), new Token("child2")));
390 	assertTrue(parent.hasNested());
391 	assertSize(2, parent.getNested());
392 
393 		// Verify unmodifiable
394 		var nested = parent.getNested();
395 		assertThrows(UnsupportedOperationException.class, () -> nested.add(new Token("child3")));
396 	}
397 
398 	@Test void d05_hasNestedEdgeCases() {
399 		// Test line 201: hasNested() method edge cases
400 		var token = new Token("test");
401 
402 		// Case 1: null nested list
403 		token.setNested(null);
404 		assertFalse(token.hasNested()); // Should return false when nested is null
405 
406 		// Case 2: empty nested list
407 		token.setNested(list());
408 		assertFalse(token.hasNested()); // Should return false when nested is empty
409 
410 		// Case 3: non-empty nested list
411 		token.setNested(l(new Token("child")));
412 		assertTrue(token.hasNested()); // Should return true when nested has content
413 
414 		// Case 4: nested list with multiple items
415 		token.setNested(l(new Token("child1"), new Token("child2")));
416 		assertTrue(token.hasNested()); // Should return true when nested has multiple items
417 	}
418 
419 	//------------------------------------------------------------------------------------------------------------------
420 	// Integration and round-trip tests
421 	//------------------------------------------------------------------------------------------------------------------
422 
423 	@Test void e01_roundTripTests() {
424 		// Simple cases
425 		assertRoundTrip("foo");
426 		assertRoundTrip("foo,bar,baz");
427 
428 		// Nested cases
429 		assertRoundTrip("foo{a,b}");
430 		assertRoundTrip("foo{a,b},bar{c,d}");
431 
432 		// Deep nesting
433 		assertRoundTrip("root{level1{level2{a,b}}}");
434 
435 		// Complex real-world case
436 		assertRoundTrip("user{name,email},config{timeout,retries}");
437 	}
438 
439 	@Test void e02_performanceTest() {
440 		// Test with large input to ensure reasonable performance
441 		var sb = new StringBuilder();
442 		for (var i = 0; i < 1000; i++) {
443 			if (i > 0) sb.append(",");
444 			sb.append("token").append(i);
445 			if (i % 10 == 0) {
446 				sb.append("{nested").append(i).append(",value").append(i).append("}");
447 			}
448 		}
449 
450 		var start = System.currentTimeMillis();
451 		var tokens = tokenize(sb.toString());
452 		var elapsed = System.currentTimeMillis() - start;
453 
454 		assertTrue(tokens.size() > 900); // Should have many tokens
455 		assertTrue(elapsed < 1000); // Should complete within 1 second
456 	}
457 
458 	//------------------------------------------------------------------------------------------------------------------
459 	// Helper methods
460 	//------------------------------------------------------------------------------------------------------------------
461 
462 	/**
463 	 * Creates a simple token for testing.
464 	 */
465 	private static Token token(String value) {
466 		return new Token(value);
467 	}
468 
469 	/**
470 	 * Asserts that a token has the expected value and nested tokens.
471 	 */
472 	private static void assertToken(Token actual, String expectedValue, String... expectedNested) {
473 		assertEquals(expectedValue, actual.getValue());
474 		if (expectedNested.length == 0) {
475 			assertFalse(actual.hasNested());
476 		} else {
477 			assertTrue(actual.hasNested());
478 			assertEquals(expectedNested.length, actual.getNested().size());
479 			for (var i = 0; i < expectedNested.length; i++) {
480 				assertEquals(expectedNested[i], actual.getNested().get(i).getValue());
481 			}
482 		}
483 	}
484 
485 	/**
486 	 * Tests that parsing and toString are inverse operations.
487 	 */
488 	private static void assertRoundTrip(String input) {
489 		var tokens = tokenize(input);
490 		var rebuilt = tokens.stream()
491 			.map(Token::toString)
492 			.collect(java.util.stream.Collectors.joining(","));
493 		assertEquals(input, rebuilt);
494 	}
495 }