001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.commons.utils; 018 019import static java.lang.Character.*; 020import static java.nio.charset.StandardCharsets.*; 021import static java.util.stream.Collectors.*; 022import static org.apache.juneau.commons.lang.StateEnum.*; 023import static org.apache.juneau.commons.utils.AssertionUtils.*; 024import static org.apache.juneau.commons.utils.CollectionUtils.*; 025import static org.apache.juneau.commons.utils.IoUtils.*; 026import static org.apache.juneau.commons.utils.ThrowableUtils.*; 027import static org.apache.juneau.commons.utils.Utils.*; 028 029import java.io.*; 030import java.lang.reflect.*; 031import java.math.*; 032import java.net.*; 033import java.nio.*; 034import java.text.*; 035import java.time.*; 036import java.time.format.*; 037import java.util.*; 038import java.util.concurrent.*; 039import java.util.concurrent.atomic.*; 040import java.util.function.*; 041import java.util.regex.*; 042import java.util.stream.*; 043import java.util.zip.*; 044 045import org.apache.juneau.commons.collections.*; 046import org.apache.juneau.commons.lang.*; 047import org.apache.juneau.commons.reflect.*; 048 049/** 050 * Reusable string utility methods. 051 */ 052public class StringUtils { 053 054 /** Characters considered common separators (comma/semicolon/colon/pipe/tab). */ 055 public static final AsciiSet COMMON_SEPARATORS = AsciiSet.of(",;:|\t"); 056 057 /** Windows-style carriage-return/line-feed sequence. */ 058 public static final String CRLF = "\r\n"; 059 060 /** Digits 0-9 represented as an {@link AsciiSet}. */ 061 public static final AsciiSet DECIMAL_CHARS = AsciiSet.of("0123456789"); 062 063 /** Digits 0-9 represented as an {@link AsciiSet}. */ 064 public static final AsciiSet DIGIT = AsciiSet.of("0123456789"); 065 066 /** Zero-length string constant. */ 067 public static final String EMPTY = ""; 068 069 /** Characters allowed at the beginning of a numeric literal. */ 070 public static final AsciiSet FIRST_NUMBER_CHARS = AsciiSet.of("+-.#0123456789"); 071 072 /** Hexadecimal digit characters. */ 073 public static final AsciiSet HEXADECIMAL_CHARS = AsciiSet.of("0123456789abcdefABCDEF"); 074 075 /** Characters allowed in HTTP headers (including quoted strings and comments). */ 076 public static final AsciiSet HTTP_HEADER_CHARS = AsciiSet.create().chars("\t -").ranges("!-[","]-}").build(); 077 078 /** Letters a-z and A-Z represented as an {@link AsciiSet}. */ 079 public static final AsciiSet LETTER = AsciiSet.create().ranges("a-z", "A-Z").build(); 080 081 /** Lowercase letters a-z represented as an {@link AsciiSet}. */ 082 public static final AsciiSet LETTER_LC = AsciiSet.create().range('a', 'z').build(); 083 084 /** Uppercase letters A-Z represented as an {@link AsciiSet}. */ 085 public static final AsciiSet LETTER_UC = AsciiSet.create().range('A', 'Z').build(); 086 087 /** Characters escaped when parsing key/value pairs. */ 088 public static final AsciiSet MAP_ESCAPE_SET = AsciiSet.of(",=\\"); 089 090 /** Unix-style newline character. */ 091 public static final String NEWLINE = "\n"; 092 093 /** Predicate that filters out {@code null} and empty strings. */ 094 public static final Predicate<String> NOT_EMPTY = Utils::ne; 095 096 /** Characters that can appear anywhere in a numeric literal. */ 097 public static final AsciiSet NUMBER_CHARS = AsciiSet.of("-xX.+-#pP0123456789abcdefABCDEF"); 098 099 /** Octal digit characters. */ 100 public static final AsciiSet OCTAL_CHARS = AsciiSet.of("01234567"); 101 102 /** Characters escaped when parsing quoted strings. */ 103 public static final AsciiSet QUOTE_ESCAPE_SET = AsciiSet.of("\"'\\"); 104 105 /** Single-space character constant. */ 106 public static final String SPACE = " "; 107 108 /** Horizontal tab character constant. */ 109 public static final String TAB = "\t"; 110 111 /** Characters considered part of a URI when encoding paths. */ 112 public static final AsciiSet URI_CHARS = AsciiSet.create().chars("?#+%;/:@&=+$,-_.!~*'()").range('0', '9').range('A', 'Z').range('a', 'z').build(); 113 114 /** Characters that may appear unescaped in path segments when URL-encoding. */ 115 public static final AsciiSet URL_ENCODE_PATHINFO_VALIDCHARS = AsciiSet.create().ranges("a-z", "A-Z", "0-9").chars("-_.*/()").build(); 116 117 /** Characters that never require URL encoding per RFC 3986. */ 118 public static final AsciiSet URL_UNENCODED_CHARS = AsciiSet.create().ranges("a-z", "A-Z", "0-9").chars("-_.!~*'()\\").build(); 119 120 /** Extended set of characters that are typically safe to leave unencoded. */ 121 public static final AsciiSet URL_UNENCODED_LAX_CHARS = URL_UNENCODED_CHARS.copy().chars(":@$,").chars("{}|\\^[]`").build(); 122 123 /** Vowel characters a, e, i, o, u (both uppercase and lowercase) represented as an {@link AsciiSet}. */ 124 public static final AsciiSet VOWEL = AsciiSet.of("aeiouAEIOU"); 125 126 /** 127 * All standard whitespace characters (space, tab, newline, carriage return, form feed, vertical tab). 128 */ 129 public static final AsciiSet WHITESPACE_CHARS = AsciiSet.of(" \t\n\r\f\u000B"); 130 131 // Maps 6-bit nibbles to BASE64 characters. 132 private static final char[] BASE64M1 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); 133 134 // Maps BASE64 characters to 6-bit nibbles. 135 private static final byte[] BASE64M2 = new byte[128]; 136 137 static { 138 for (var i = 0; i < 64; i++) 139 BASE64M2[BASE64M1[i]] = (byte)i; 140 } 141 private static final Random RANDOM = new Random(); 142 143 /** 144 * Floating-point literal validation pattern used by {@link Double#valueOf(String)}. 145 * 146 * <p> 147 * Matches decimal, hexadecimal, and scientific-notation literals, as well as {@code NaN} and {@code Infinity}, 148 * with optional sign, exponent, and type suffixes. Copied from the JDK source to keep parsing logic consistent. 149 */ 150 public static final Pattern FP_REGEX = Pattern.compile( 151 "[+-]?(NaN|Infinity|((((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)|(((0[xX](\\p{XDigit}+)(\\.)?)|(0[xX](\\p{XDigit}+)?(\\.)(\\p{XDigit}+)))[pP][+-]?(\\p{Digit}+)))[fFdD]?))[\\x00-\\x20]*" // NOSONAR 152 ); 153 154 static final Map<Character,AsciiSet> ESCAPE_SETS = new ConcurrentHashMap<>(); 155 156 private static final List<Readifier> READIFIERS = loadReadifiers(); 157 private static final Cache<Class<?>,Function<Object,String>> READIFIER_CACHE = Cache.<Class<?>,Function<Object,String>>create().weak().build(); 158 159 private static final char[] HEX = "0123456789ABCDEF".toCharArray(); 160 161 /** 162 * Abbreviates a string using ellipses if it exceeds the specified length. 163 * 164 * <p> 165 * If the string is longer than the specified length, it is truncated and <js>"..."</js> is appended. 166 * The total length of the result will be exactly <c>length</c> characters (including the ellipses). 167 * 168 * <p> 169 * If the string is <jk>null</jk>, shorter than or equal to <c>length</c>, or has length 3 or less, 170 * the original string is returned unchanged. 171 * 172 * <h5 class='section'>Example:</h5> 173 * <p class='bjava'> 174 * abbreviate(<js>"Hello World"</js>, 8); <jc>// "Hello..."</jc> 175 * abbreviate(<js>"Hello World"</js>, 20); <jc>// "Hello World" (no change)</jc> 176 * abbreviate(<js>"Hi"</js>, 5); <jc>// "Hi" (too short to abbreviate)</jc> 177 * abbreviate(<jk>null</jk>, 10); <jc>// null</jc> 178 * </p> 179 * 180 * @param in The input string. Can be <jk>null</jk>. 181 * @param length The maximum length of the resulting string (must be at least 4 for abbreviation to occur). 182 * @return The abbreviated string with ellipses, or the original string if no abbreviation is needed. 183 */ 184 public static String abbreviate(String in, int length) { 185 if (in == null || in.length() <= length || in.length() <= 3) 186 return in; 187 return in.substring(0, length - 3) + "..."; 188 } 189 190 /** 191 * Appends a string to a StringBuilder, creating a new one if null. 192 * 193 * @param sb The StringBuilder to append to, or <jk>null</jk> to create a new one. 194 * @param in The string to append. 195 * @return The StringBuilder with the string appended. 196 */ 197 public static StringBuilder append(StringBuilder sb, String in) { 198 if (sb == null) 199 return new StringBuilder(in); 200 sb.append(in); 201 return sb; 202 } 203 204 /** 205 * Appends a string to a StringBuilder if the string is not blank. 206 * 207 * <p> 208 * Returns the same StringBuilder instance for method chaining. 209 * If the string is <jk>null</jk>, empty, or contains only whitespace, nothing is appended. 210 * If <c>sb</c> is <jk>null</jk> and an append is going to occur, a new StringBuilder is automatically created. 211 * 212 * <h5 class='section'>Examples:</h5> 213 * <p class='bjava'> 214 * StringBuilder <jv>sb</jv> = <jk>new</jk> StringBuilder(); 215 * appendIfNotBlank(<jv>sb</jv>, <js>"hello"</js>); <jc>// Appends "hello"</jc> 216 * appendIfNotBlank(<jv>sb</jv>, <js>" "</js>); <jc>// Does nothing</jc> 217 * appendIfNotBlank(<jv>sb</jv>, <jk>null</jk>); <jc>// Does nothing</jc> 218 * appendIfNotBlank(<jv>sb</jv>, <js>"world"</js>); <jc>// Appends "world"</jc> 219 * <jc>// Result: "helloworld"</jc> 220 * 221 * <jc>// Auto-create StringBuilder if null and append occurs</jc> 222 * StringBuilder <jv>sb2</jv> = appendIfNotBlank(<jk>null</jk>, <js>"test"</js>); <jc>// Creates new StringBuilder with "test"</jc> 223 * StringBuilder <jv>sb3</jv> = appendIfNotBlank(<jk>null</jk>, <js>" "</js>); <jc>// Returns null (no append occurred)</jc> 224 * </p> 225 * 226 * @param sb The StringBuilder to append to. Can be <jk>null</jk>. 227 * @param str The string to append if not blank. Can be <jk>null</jk>. 228 * @return The same StringBuilder instance for method chaining, or a new StringBuilder if <c>sb</c> was <jk>null</jk> and an append occurred, or <jk>null</jk> if <c>sb</c> was <jk>null</jk> and no append occurred. 229 */ 230 public static StringBuilder appendIfNotBlank(StringBuilder sb, String str) { 231 if (isNotBlank(str)) { 232 if (sb == null) 233 sb = new StringBuilder(); 234 sb.append(str); 235 } 236 return sb; 237 } 238 239 /** 240 * Appends a string to a StringBuilder if the string is not empty. 241 * 242 * <p> 243 * Returns the same StringBuilder instance for method chaining. 244 * If the string is <jk>null</jk> or empty, nothing is appended. 245 * If <c>sb</c> is <jk>null</jk> and an append is going to occur, a new StringBuilder is automatically created. 246 * 247 * <h5 class='section'>Examples:</h5> 248 * <p class='bjava'> 249 * StringBuilder <jv>sb</jv> = <jk>new</jk> StringBuilder(); 250 * appendIfNotEmpty(<jv>sb</jv>, <js>"hello"</js>); <jc>// Appends "hello"</jc> 251 * appendIfNotEmpty(<jv>sb</jv>, <js>""</js>); <jc>// Does nothing</jc> 252 * appendIfNotEmpty(<jv>sb</jv>, <jk>null</jk>); <jc>// Does nothing</jc> 253 * appendIfNotEmpty(<jv>sb</jv>, <js>"world"</js>); <jc>// Appends "world"</jc> 254 * <jc>// Result: "helloworld"</jc> 255 * 256 * <jc>// Auto-create StringBuilder if null and append occurs</jc> 257 * StringBuilder <jv>sb2</jv> = appendIfNotEmpty(<jk>null</jk>, <js>"test"</js>); <jc>// Creates new StringBuilder with "test"</jc> 258 * StringBuilder <jv>sb3</jv> = appendIfNotEmpty(<jk>null</jk>, <jk>null</jk>); <jc>// Returns null (no append occurred)</jc> 259 * </p> 260 * 261 * @param sb The StringBuilder to append to. Can be <jk>null</jk>. 262 * @param str The string to append if not empty. Can be <jk>null</jk>. 263 * @return The same StringBuilder instance for method chaining, or a new StringBuilder if <c>sb</c> was <jk>null</jk> and an append occurred, or <jk>null</jk> if <c>sb</c> was <jk>null</jk> and no append occurred. 264 */ 265 public static StringBuilder appendIfNotEmpty(StringBuilder sb, String str) { 266 if (ne(str)) { 267 if (sb == null) 268 sb = new StringBuilder(); 269 sb.append(str); 270 } 271 return sb; 272 } 273 274 /** 275 * Appends a string to a StringBuilder with a separator, only adding the separator if the StringBuilder is not empty. 276 * 277 * <p> 278 * Returns the same StringBuilder instance for method chaining. 279 * If the StringBuilder is empty, only the string is appended (no separator). 280 * If the StringBuilder is not empty, the separator is appended first, then the string. 281 * If <c>sb</c> is <jk>null</jk> and an append is going to occur, a new StringBuilder is automatically created. 282 * 283 * <h5 class='section'>Examples:</h5> 284 * <p class='bjava'> 285 * StringBuilder <jv>sb</jv> = <jk>new</jk> StringBuilder(); 286 * appendWithSeparator(<jv>sb</jv>, <js>"first"</js>, <js>", "</js>); <jc>// Appends "first"</jc> 287 * appendWithSeparator(<jv>sb</jv>, <js>"second"</js>, <js>", "</js>); <jc>// Appends ", second"</jc> 288 * appendWithSeparator(<jv>sb</jv>, <js>"third"</js>, <js>", "</js>); <jc>// Appends ", third"</jc> 289 * <jc>// Result: "first, second, third"</jc> 290 * 291 * <jc>// Auto-create StringBuilder if null and append occurs</jc> 292 * StringBuilder <jv>sb2</jv> = appendWithSeparator(<jk>null</jk>, <js>"test"</js>, <js>", "</js>); <jc>// Creates new StringBuilder with "test"</jc> 293 * StringBuilder <jv>sb3</jv> = appendWithSeparator(<jk>null</jk>, <jk>null</jk>, <js>", "</js>); <jc>// Returns null (no append occurred)</jc> 294 * </p> 295 * 296 * @param sb The StringBuilder to append to. Can be <jk>null</jk>. 297 * @param str The string to append. Can be <jk>null</jk>. 298 * @param separator The separator to add before the string if the StringBuilder is not empty. Can be <jk>null</jk>. 299 * @return The same StringBuilder instance for method chaining, or a new StringBuilder if <c>sb</c> was <jk>null</jk> and an append occurred, or <jk>null</jk> if <c>sb</c> was <jk>null</jk> and no append occurred. 300 */ 301 public static StringBuilder appendWithSeparator(StringBuilder sb, String str, String separator) { 302 if (str != null) { 303 if (sb == null) 304 sb = new StringBuilder(); 305 else if (sb.length() > 0 && separator != null) 306 sb.append(separator); 307 sb.append(str); 308 } 309 return sb; 310 } 311 312 /** 313 * Adds the appropriate indefinite article ('a' or 'an') before a word. 314 * 315 * <p>Uses a simple vowel-based rule: 'an' if the word starts with a vowel, 'a' otherwise. 316 * 317 * @param subject The word to articlize. 318 * @return The word with 'a' or 'an' prepended. 319 */ 320 public static String articlized(String subject) { 321 return (VOWEL.contains(subject.charAt(0)) ? "an " : "a ") + subject; 322 } 323 324 /** 325 * BASE64-decodes the specified string. 326 * 327 * @param in The BASE-64 encoded string. 328 * @return The decoded byte array, or null if the input was <jk>null</jk>. 329 */ 330 public static byte[] base64Decode(String in) { 331 if (in == null) 332 return null; // NOSONAR - Intentional. 333 334 var bIn = in.getBytes(UTF8); 335 336 assertArg(bIn.length % 4 == 0, "Invalid BASE64 string length. Must be multiple of 4."); 337 338 // Strip out any trailing '=' filler characters. 339 var inLength = bIn.length; 340 while (inLength > 0 && bIn[inLength - 1] == '=') 341 inLength--; 342 343 var outLength = (inLength * 3) / 4; 344 var out = new byte[outLength]; 345 var iIn = 0; 346 var iOut = 0; 347 while (iIn < inLength) { 348 var i0 = bIn[iIn++]; 349 var i1 = bIn[iIn++]; 350 var i2 = iIn < inLength ? bIn[iIn++] : 'A'; 351 var i3 = iIn < inLength ? bIn[iIn++] : 'A'; 352 var b0 = BASE64M2[i0]; 353 var b1 = BASE64M2[i1]; 354 var b2 = BASE64M2[i2]; 355 var b3 = BASE64M2[i3]; 356 var o0 = (b0 << 2) | (b1 >>> 4); 357 var o1 = ((b1 & 0xf) << 4) | (b2 >>> 2); 358 var o2 = ((b2 & 3) << 6) | b3; 359 out[iOut++] = (byte)o0; 360 if (iOut < outLength) 361 out[iOut++] = (byte)o1; 362 if (iOut < outLength) 363 out[iOut++] = (byte)o2; 364 } 365 return out; 366 } 367 368 /** 369 * Shortcut for calling <c>base64Decode(String)</c> and converting the result to a UTF-8 encoded string. 370 * 371 * @param in The BASE-64 encoded string to decode. 372 * @return The decoded string. 373 */ 374 public static String base64DecodeToString(String in) { 375 var b = base64Decode(in); 376 if (b == null) 377 return null; 378 return new String(b, UTF8); 379 } 380 381 /** 382 * BASE64-encodes the specified byte array. 383 * 384 * @param in The input byte array to convert. 385 * @return The byte array converted to a BASE-64 encoded string. 386 */ 387 public static String base64Encode(byte[] in) { 388 if (in == null) 389 return null; 390 var outLength = (in.length * 4 + 2) / 3; // Output length without padding 391 var out = new char[((in.length + 2) / 3) * 4]; // Length includes padding. 392 var iIn = 0; 393 var iOut = 0; 394 while (iIn < in.length) { 395 var i0 = in[iIn++] & 0xff; 396 var i1 = iIn < in.length ? in[iIn++] & 0xff : 0; 397 var i2 = iIn < in.length ? in[iIn++] & 0xff : 0; 398 var o0 = i0 >>> 2; 399 var o1 = ((i0 & 3) << 4) | (i1 >>> 4); 400 var o2 = ((i1 & 0xf) << 2) | (i2 >>> 6); 401 var o3 = i2 & 0x3F; 402 out[iOut++] = BASE64M1[o0]; 403 out[iOut++] = BASE64M1[o1]; 404 out[iOut] = iOut < outLength ? BASE64M1[o2] : '='; 405 iOut++; 406 out[iOut] = iOut < outLength ? BASE64M1[o3] : '='; 407 iOut++; 408 } 409 return new String(out); 410 } 411 412 /** 413 * Shortcut for calling <code>base64Encode(in.getBytes(<js>"UTF-8"</js>))</code> 414 * 415 * @param in The input string to convert. 416 * @return The string converted to BASE-64 encoding. 417 */ 418 public static String base64EncodeToString(String in) { 419 if (in == null) 420 return null; 421 return base64Encode(in.getBytes(UTF8)); 422 } 423 424 /** 425 * Builds a string using a functional approach with a StringBuilder. 426 * 427 * <p> 428 * Creates a new StringBuilder, applies the consumer to it, and returns the resulting string. 429 * This provides a functional way to build strings without manually managing the StringBuilder. 430 * 431 * <h5 class='section'>Examples:</h5> 432 * <p class='bjava'> 433 * String <jv>result</jv> = buildString(<jv>sb</jv> -> { 434 * <jv>sb</jv>.append(<js>"Hello"</js>); 435 * <jv>sb</jv>.append(<js>" "</js>); 436 * <jv>sb</jv>.append(<js>"World"</js>); 437 * }); 438 * <jc>// Returns: "Hello World"</jc> 439 * 440 * String <jv>joined</jv> = buildString(<jv>sb</jv> -> { 441 * appendWithSeparator(<jv>sb</jv>, <js>"a"</js>, <js>", "</js>); 442 * appendWithSeparator(<jv>sb</jv>, <js>"b"</js>, <js>", "</js>); 443 * appendWithSeparator(<jv>sb</jv>, <js>"c"</js>, <js>", "</js>); 444 * }); 445 * <jc>// Returns: "a, b, c"</jc> 446 * </p> 447 * 448 * @param builder The consumer that builds the string using the provided StringBuilder. 449 * @return The built string. 450 * @throws IllegalArgumentException If <c>builder</c> is <jk>null</jk>. 451 */ 452 public static String buildString(Consumer<StringBuilder> builder) { 453 assertArgNotNull("builder", builder); 454 var sb = new StringBuilder(); 455 builder.accept(sb); 456 return sb.toString(); 457 } 458 459 /** 460 * Converts a string to camelCase format. 461 * 462 * <p> 463 * Handles various input formats: 464 * <ul> 465 * <li>Space-separated: "hello world" → "helloWorld"</li> 466 * <li>Underscore-separated: "hello_world" → "helloWorld"</li> 467 * <li>Hyphen-separated: "hello-world" → "helloWorld"</li> 468 * <li>PascalCase: "HelloWorld" → "helloWorld"</li> 469 * <li>Already camelCase: "helloWorld" → "helloWorld"</li> 470 * <li>Mixed case: "Hello_World-Test" → "helloWorldTest"</li> 471 * </ul> 472 * 473 * <h5 class='section'>Example:</h5> 474 * <p class='bjava'> 475 * camelCase(<jk>null</jk>); <jc>// null</jc> 476 * camelCase(<js>""</js>); <jc>// ""</jc> 477 * camelCase(<js>"hello world"</js>); <jc>// "helloWorld"</jc> 478 * camelCase(<js>"hello_world"</js>); <jc>// "helloWorld"</jc> 479 * camelCase(<js>"hello-world"</js>); <jc>// "helloWorld"</jc> 480 * camelCase(<js>"HelloWorld"</js>); <jc>// "helloWorld"</jc> 481 * camelCase(<js>"helloWorld"</js>); <jc>// "helloWorld"</jc> 482 * camelCase(<js>" hello world "</js>); <jc>// "helloWorld"</jc> 483 * </p> 484 * 485 * @param str The string to convert. 486 * @return The camelCase string, or <jk>null</jk> if input is <jk>null</jk>. 487 */ 488 public static String camelCase(String str) { 489 if (isEmpty(str)) 490 return str; 491 492 var words = splitWords(str); 493 if (words.isEmpty()) 494 return ""; 495 496 var result = new StringBuilder(); 497 for (var i = 0; i < words.size(); i++) { 498 var word = words.get(i); 499 if (i == 0) { 500 result.append(uncapitalize(word)); 501 } else { 502 result.append(capitalize(word.toLowerCase())); 503 } 504 } 505 506 return result.toString(); 507 } 508 509 /** 510 * Capitalizes the first character of a string. 511 * 512 * <h5 class='section'>Example:</h5> 513 * <p class='bjava'> 514 * capitalize(<jk>null</jk>); <jc>// null</jc> 515 * capitalize(<js>""</js>); <jc>// ""</jc> 516 * capitalize(<js>"hello"</js>); <jc>// "Hello"</jc> 517 * capitalize(<js>"Hello"</js>); <jc>// "Hello"</jc> 518 * capitalize(<js>"HELLO"</js>); <jc>// "HELLO"</jc> 519 * </p> 520 * 521 * @param str The string to capitalize. 522 * @return The string with the first character capitalized, or <jk>null</jk> if input is <jk>null</jk>. 523 */ 524 public static String capitalize(String str) { 525 if (isEmpty(str)) 526 return str; 527 return Character.toUpperCase(str.charAt(0)) + str.substring(1); 528 } 529 530 /** 531 * Converts a comma-delimited string to a list. 532 * 533 * @param s The comma-delimited string. 534 * @return A new modifiable list. Never <jk>null</jk>. 535 */ 536 public static List<String> cdlToList(String s) { 537 return split(s); 538 } 539 540 /** 541 * Converts a comma-delimited string to a set. 542 * 543 * @param s The comma-delimited string. 544 * @return A new {@link LinkedHashSet}. Never <jk>null</jk>. 545 */ 546 public static LinkedHashSet<String> cdlToSet(String s) { 547 return split(s).stream().collect(Collectors.toCollection(LinkedHashSet::new)); 548 } 549 550 /** 551 * Returns the character at the specified index in the string without throwing exceptions. 552 * 553 * <p> 554 * This is a null-safe and bounds-safe version of {@link String#charAt(int)}. 555 * Returns <c>0</c> (null character) if: 556 * <ul> 557 * <li>The string is <jk>null</jk></li> 558 * <li>The index is negative</li> 559 * <li>The index is greater than or equal to the string length</li> 560 * </ul> 561 * 562 * <h5 class='section'>Example:</h5> 563 * <p class='bjava'> 564 * charAt(<js>"Hello"</js>, 0); <jc>// 'H'</jc> 565 * charAt(<js>"Hello"</js>, 4); <jc>// 'o'</jc> 566 * charAt(<js>"Hello"</js>, 5); <jc>// 0 (out of bounds)</jc> 567 * charAt(<js>"Hello"</js>, -1); <jc>// 0 (out of bounds)</jc> 568 * charAt(<jk>null</jk>, 0); <jc>// 0 (null string)</jc> 569 * </p> 570 * 571 * @param s The string. 572 * @param i The index position. 573 * @return The character at the specified index, or <c>0</c> if the index is out-of-range or the string is <jk>null</jk>. 574 * @see String#charAt(int) 575 */ 576 public static char charAt(String s, int i) { 577 if (s == null || i < 0 || i >= s.length()) 578 return 0; 579 return s.charAt(i); 580 } 581 582 /** 583 * Cleans a string by removing control characters and normalizing whitespace. 584 * 585 * <h5 class='section'>Example:</h5> 586 * <p class='bjava'> 587 * clean(<js>"hello\u0000\u0001world"</js>); <jc>// "hello world"</jc> 588 * clean(<js>"hello \t\n world"</js>); <jc>// "hello world"</jc> 589 * </p> 590 * 591 * @param str The string to clean. 592 * @return The cleaned string, or <jk>null</jk> if input is <jk>null</jk>. 593 */ 594 public static String clean(String str) { 595 if (str == null) 596 return null; 597 str = removeControlChars(str); 598 return normalizeWhitespace(str); 599 } 600 601 /** 602 * Compares two strings lexicographically, but gracefully handles <jk>null</jk> values. 603 * 604 * <p> 605 * Null handling: 606 * <ul> 607 * <li>Both <jk>null</jk> → returns <c>0</c> (equal)</li> 608 * <li>First <jk>null</jk> → returns {@link Integer#MIN_VALUE}</li> 609 * <li>Second <jk>null</jk> → returns {@link Integer#MAX_VALUE}</li> 610 * <li>Neither <jk>null</jk> → returns the same as {@link String#compareTo(String)}</li> 611 * </ul> 612 * 613 * <h5 class='section'>Example:</h5> 614 * <p class='bjava'> 615 * compare(<js>"apple"</js>, <js>"banana"</js>); <jc>// negative (apple < banana)</jc> 616 * compare(<js>"banana"</js>, <js>"apple"</js>); <jc>// positive (banana > apple)</jc> 617 * compare(<js>"apple"</js>, <js>"apple"</js>); <jc>// 0 (equal)</jc> 618 * compare(<jk>null</jk>, <jk>null</jk>); <jc>// 0 (equal)</jc> 619 * compare(<jk>null</jk>, <js>"apple"</js>); <jc>// Integer.MIN_VALUE</jc> 620 * compare(<js>"apple"</js>, <jk>null</jk>); <jc>// Integer.MAX_VALUE</jc> 621 * </p> 622 * 623 * @param s1 The first string. 624 * @param s2 The second string. 625 * @return A negative integer, zero, or a positive integer as the first string is less than, equal to, or greater than the second. 626 * @see String#compareTo(String) 627 */ 628 public static int compare(String s1, String s2) { 629 if (s1 == null && s2 == null) 630 return 0; 631 if (s1 == null) 632 return Integer.MIN_VALUE; 633 if (s2 == null) 634 return Integer.MAX_VALUE; 635 return s1.compareTo(s2); 636 } 637 638 /** 639 * Compares two strings lexicographically, ignoring case. 640 * 641 * <p> 642 * Returns a negative integer, zero, or a positive integer as the first string is less than, equal to, or greater than the second string, ignoring case. 643 * 644 * <h5 class='section'>Example:</h5> 645 * <p class='bjava'> 646 * compareIgnoreCase(<js>"apple"</js>, <js>"BANANA"</js>); <jc>// negative (apple < banana)</jc> 647 * compareIgnoreCase(<js>"Hello"</js>, <js>"hello"</js>); <jc>// 0 (equal)</jc> 648 * compareIgnoreCase(<js>"Zebra"</js>, <js>"apple"</js>); <jc>// positive (zebra > apple)</jc> 649 * </p> 650 * 651 * @param str1 The first string. 652 * @param str2 The second string. 653 * @return A negative integer, zero, or a positive integer as the first string is less than, equal to, or greater than the second. 654 */ 655 public static int compareIgnoreCase(String str1, String str2) { 656 if (str1 == str2) 657 return 0; 658 if (str1 == null) 659 return -1; 660 if (str2 == null) 661 return 1; 662 return str1.compareToIgnoreCase(str2); 663 } 664 665 /** 666 * Compresses a UTF-8 string into a GZIP-compressed byte array. 667 * 668 * <p> 669 * This method compresses the input string using GZIP compression. The string is first converted to 670 * UTF-8 bytes, then compressed. Use {@link #decompress(byte[])} to decompress the result. 671 * 672 * <h5 class='section'>Example:</h5> 673 * <p class='bjava'> 674 * <jc>// Compress a string</jc> 675 * byte[] <jv>compressed</jv> = compress(<js>"Hello World"</js>); 676 * 677 * <jc>// Decompress it back</jc> 678 * String <jv>decompressed</jv> = decompress(<jv>compressed</jv>); 679 * <jc>// Returns: "Hello World"</jc> 680 * </p> 681 * 682 * @param contents The UTF-8 string to compress. 683 * @return The GZIP-compressed byte array. 684 * @throws Exception If compression fails. 685 * @see #decompress(byte[]) 686 */ 687 public static byte[] compress(String contents) throws Exception { 688 var baos = new ByteArrayOutputStream(contents.length() >> 1); 689 try (var gos = new GZIPOutputStream(baos)) { 690 gos.write(contents.getBytes()); 691 gos.finish(); 692 gos.flush(); 693 return baos.toByteArray(); 694 } 695 } 696 697 /** 698 * Checks if a string contains the specified character. 699 * 700 * <p> 701 * This is a null-safe operation. Returns <jk>false</jk> if the string is <jk>null</jk>. 702 * 703 * <h5 class='section'>Example:</h5> 704 * <p class='bjava'> 705 * contains(<js>"Hello World"</js>, <js>'o'</js>); <jc>// true</jc> 706 * contains(<js>"Hello World"</js>, <js>'x'</js>); <jc>// false</jc> 707 * contains(<jk>null</jk>, <js>'a'</js>); <jc>// false</jc> 708 * </p> 709 * 710 * @param s The string to check. 711 * @param c The character to check for. 712 * @return <jk>true</jk> if the string contains the specified character. 713 * @see #contains(String, CharSequence) 714 * @see #contains(String, String) 715 * @see #containsAny(String, char...) 716 */ 717 public static boolean contains(String s, char c) { 718 return s != null && s.indexOf(c) >= 0; 719 } 720 721 /** 722 * Null-safe {@link String#contains(CharSequence)} operation. 723 * 724 * <p> 725 * Returns <jk>false</jk> if the string is <jk>null</jk>, otherwise behaves the same as 726 * {@link String#contains(CharSequence)}. 727 * 728 * <h5 class='section'>Example:</h5> 729 * <p class='bjava'> 730 * contains(<js>"Hello World"</js>, <js>"World"</js>); <jc>// true</jc> 731 * contains(<js>"Hello World"</js>, <js>"Foo"</js>); <jc>// false</jc> 732 * contains(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 733 * </p> 734 * 735 * @param value The string to check. 736 * @param substring The substring to check for. 737 * @return <jk>true</jk> if the value contains the specified substring, <jk>false</jk> if the string is <jk>null</jk>. 738 * @see #contains(String, char) 739 * @see #contains(String, String) 740 * @see #containsAny(String, CharSequence...) 741 */ 742 public static boolean contains(String value, CharSequence substring) { 743 return nn(value) && value.contains(substring); 744 } 745 746 /** 747 * Null-safe check if a string contains another string. 748 * 749 * <p> 750 * Returns <jk>false</jk> if the string is <jk>null</jk>, otherwise behaves the same as 751 * {@link String#contains(CharSequence)}. 752 * 753 * <h5 class='section'>Example:</h5> 754 * <p class='bjava'> 755 * contains(<js>"Hello World"</js>, <js>"World"</js>); <jc>// true</jc> 756 * contains(<js>"Hello World"</js>, <js>"Foo"</js>); <jc>// false</jc> 757 * contains(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 758 * </p> 759 * 760 * @param s The string to check. 761 * @param substring The substring to check for. 762 * @return <jk>true</jk> if the string contains the specified substring, <jk>false</jk> if the string is <jk>null</jk>. 763 * @see #contains(String, CharSequence) 764 * @see #containsAny(String, String...) 765 */ 766 public static boolean contains(String s, String substring) { 767 return nn(s) && s.contains(substring); 768 } 769 770 /** 771 * Checks if a string contains all of the specified characters. 772 * 773 * <p> 774 * This is a null-safe operation that returns <jk>false</jk> if: 775 * <ul> 776 * <li>The string is <jk>null</jk></li> 777 * <li>The values array is <jk>null</jk> or empty</li> 778 * <li>Any of the specified characters are not found in the string</li> 779 * </ul> 780 * 781 * <h5 class='section'>Example:</h5> 782 * <p class='bjava'> 783 * containsAll(<js>"Hello World"</js>, <js>'H'</js>, <js>'e'</js>, <js>'l'</js>); <jc>// true (contains all)</jc> 784 * containsAll(<js>"Hello World"</js>, <js>'H'</js>, <js>'x'</js>); <jc>// false (missing 'x')</jc> 785 * containsAll(<jk>null</jk>, <js>'a'</js>); <jc>// false</jc> 786 * </p> 787 * 788 * @param s The string to check. 789 * @param values The characters to check for. 790 * @return <jk>true</jk> if the string contains all of the specified characters. 791 * @see #containsAny(String, char...) 792 */ 793 public static boolean containsAll(String s, char...values) { 794 if (s == null || values == null || values.length == 0) 795 return false; 796 for (var v : values) { 797 if (s.indexOf(v) < 0) 798 return false; 799 } 800 return true; 801 } 802 803 /** 804 * Checks if a string contains all of the specified substrings. 805 * 806 * <p> 807 * This is a null-safe operation that returns <jk>false</jk> if: 808 * <ul> 809 * <li>The string is <jk>null</jk></li> 810 * <li>The values array is <jk>null</jk> or empty</li> 811 * <li>Any of the specified substrings are not found in the string</li> 812 * </ul> 813 * 814 * <h5 class='section'>Example:</h5> 815 * <p class='bjava'> 816 * containsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"World"</js>); <jc>// true (contains all)</jc> 817 * containsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// false (missing "Foo")</jc> 818 * containsAll(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 819 * </p> 820 * 821 * @param s The string to check. 822 * @param values The substrings to check for. 823 * @return <jk>true</jk> if the string contains all of the specified substrings. 824 * @see #containsAny(String, CharSequence...) 825 */ 826 public static boolean containsAll(String s, CharSequence...values) { 827 if (s == null || values == null || values.length == 0) 828 return false; 829 for (var v : values) { 830 if (! s.contains(v)) 831 return false; 832 } 833 return true; 834 } 835 836 /** 837 * Checks if a string contains all of the specified substrings. 838 * 839 * <p> 840 * This is a null-safe operation that returns <jk>false</jk> if: 841 * <ul> 842 * <li>The string is <jk>null</jk></li> 843 * <li>The values array is <jk>null</jk> or empty</li> 844 * <li>Any of the specified substrings are not found in the string</li> 845 * </ul> 846 * 847 * <h5 class='section'>Example:</h5> 848 * <p class='bjava'> 849 * containsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"World"</js>); <jc>// true (contains all)</jc> 850 * containsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// false (missing "Foo")</jc> 851 * containsAll(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 852 * </p> 853 * 854 * @param s The string to check. 855 * @param values The substrings to check for. 856 * @return <jk>true</jk> if the string contains all of the specified substrings. 857 * @see #containsAny(String, String...) 858 */ 859 public static boolean containsAll(String s, String...values) { 860 if (s == null || values == null || values.length == 0) 861 return false; 862 for (var v : values) { 863 if (! s.contains(v)) 864 return false; 865 } 866 return true; 867 } 868 869 /** 870 * Checks if a string contains any of the specified characters. 871 * 872 * <p> 873 * This is a null-safe operation that returns <jk>false</jk> if: 874 * <ul> 875 * <li>The string is <jk>null</jk></li> 876 * <li>The values array is <jk>null</jk> or empty</li> 877 * <li>None of the specified characters are found in the string</li> 878 * </ul> 879 * 880 * <h5 class='section'>Example:</h5> 881 * <p class='bjava'> 882 * containsAny(<js>"Hello World"</js>, <js>'o'</js>, <js>'x'</js>); <jc>// true (contains 'o')</jc> 883 * containsAny(<js>"Hello World"</js>, <js>'x'</js>, <js>'y'</js>); <jc>// false</jc> 884 * containsAny(<jk>null</jk>, <js>'a'</js>); <jc>// false</jc> 885 * </p> 886 * 887 * @param s The string to check. 888 * @param values The characters to check for. 889 * @return <jk>true</jk> if the string contains any of the specified characters. 890 * @see #contains(String, char) 891 * @see #containsAll(String, char...) 892 */ 893 public static boolean containsAny(String s, char...values) { 894 if (s == null || values == null || values.length == 0) 895 return false; 896 for (var v : values) { 897 if (s.indexOf(v) >= 0) 898 return true; 899 } 900 return false; 901 } 902 903 /** 904 * Checks if a string contains any of the specified substrings. 905 * 906 * <p> 907 * This is a null-safe operation that returns <jk>false</jk> if: 908 * <ul> 909 * <li>The string is <jk>null</jk></li> 910 * <li>The values array is <jk>null</jk> or empty</li> 911 * <li>None of the specified substrings are found in the string</li> 912 * </ul> 913 * 914 * <h5 class='section'>Example:</h5> 915 * <p class='bjava'> 916 * containsAny(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// true (contains "Hello")</jc> 917 * containsAny(<js>"Hello World"</js>, <js>"Foo"</js>, <js>"Bar"</js>); <jc>// false</jc> 918 * containsAny(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 919 * </p> 920 * 921 * @param s The string to check. 922 * @param values The substrings to check for. 923 * @return <jk>true</jk> if the string contains any of the specified substrings. 924 * @see #contains(String, CharSequence) 925 * @see #containsAll(String, CharSequence...) 926 */ 927 public static boolean containsAny(String s, CharSequence...values) { 928 if (s == null || values == null || values.length == 0) 929 return false; 930 for (var v : values) { 931 if (s.contains(v)) 932 return true; 933 } 934 return false; 935 } 936 937 /** 938 * Checks if a string contains any of the specified substrings. 939 * 940 * <p> 941 * This is a null-safe operation that returns <jk>false</jk> if: 942 * <ul> 943 * <li>The string is <jk>null</jk></li> 944 * <li>The values array is <jk>null</jk> or empty</li> 945 * <li>None of the specified substrings are found in the string</li> 946 * </ul> 947 * 948 * <h5 class='section'>Example:</h5> 949 * <p class='bjava'> 950 * containsAny(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// true (contains "Hello")</jc> 951 * containsAny(<js>"Hello World"</js>, <js>"Foo"</js>, <js>"Bar"</js>); <jc>// false</jc> 952 * containsAny(<jk>null</jk>, <js>"Hello"</js>); <jc>// false</jc> 953 * containsAny(<js>"Hello"</js>); <jc>// false (no values to check)</jc> 954 * </p> 955 * 956 * @param s The string to check. 957 * @param values The substrings to check for. 958 * @return <jk>true</jk> if the string contains any of the specified substrings. 959 * @see #contains(String, String) 960 * @see #containsAll(String, String...) 961 */ 962 public static boolean containsAny(String s, String...values) { 963 if (s == null || values == null || values.length == 0) 964 return false; 965 for (var v : values) { 966 if (s.contains(v)) 967 return true; 968 } 969 return false; 970 } 971 972 /** 973 * Checks if a string contains a substring, ignoring case. 974 * 975 * <h5 class='section'>Example:</h5> 976 * <p class='bjava'> 977 * containsIgnoreCase(<js>"Hello World"</js>, <js>"world"</js>); <jc>// true</jc> 978 * containsIgnoreCase(<js>"Hello World"</js>, <js>"WORLD"</js>); <jc>// true</jc> 979 * containsIgnoreCase(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// false</jc> 980 * </p> 981 * 982 * @param str The string to search in. 983 * @param search The substring to search for. 984 * @return <jk>true</jk> if the string contains the substring (ignoring case), <jk>false</jk> otherwise. 985 */ 986 public static boolean containsIgnoreCase(String str, String search) { 987 if (str == null || search == null) 988 return false; 989 return str.toLowerCase().contains(search.toLowerCase()); 990 } 991 992 /** 993 * Counts the number of occurrences of the specified character in the specified string. 994 * 995 * <p> 996 * Returns <c>0</c> if the string is <jk>null</jk>. 997 * 998 * <h5 class='section'>Example:</h5> 999 * <p class='bjava'> 1000 * countChars(<js>"Hello World"</js>, <js>'o'</js>); <jc>// 2</jc> 1001 * countChars(<js>"Hello World"</js>, <js>'x'</js>); <jc>// 0</jc> 1002 * countChars(<jk>null</jk>, <js>'a'</js>); <jc>// 0</jc> 1003 * </p> 1004 * 1005 * @param s The string to check. 1006 * @param c The character to count. 1007 * @return The number of occurrences of the character, or <c>0</c> if the string was <jk>null</jk>. 1008 */ 1009 public static int countChars(String s, char c) { 1010 var count = 0; 1011 if (s == null) 1012 return count; 1013 for (var i = 0; i < s.length(); i++) 1014 if (s.charAt(i) == c) 1015 count++; 1016 return count; 1017 } 1018 1019 /** 1020 * Counts the number of occurrences of a substring within a string. 1021 * 1022 * <h5 class='section'>Example:</h5> 1023 * <p class='bjava'> 1024 * countMatches(<js>"hello world world"</js>, <js>"world"</js>); <jc>// 2</jc> 1025 * countMatches(<js>"ababab"</js>, <js>"ab"</js>); <jc>// 3</jc> 1026 * countMatches(<js>"hello"</js>, <js>"xyz"</js>); <jc>// 0</jc> 1027 * </p> 1028 * 1029 * @param str The string to search in. 1030 * @param search The substring to count. 1031 * @return The number of occurrences, or <c>0</c> if not found or if either parameter is <jk>null</jk> or empty. 1032 */ 1033 public static int countMatches(String str, String search) { 1034 if (isEmpty(str) || isEmpty(search)) 1035 return 0; 1036 var count = 0; 1037 var index = 0; 1038 while ((index = str.indexOf(search, index)) != -1) { 1039 count++; 1040 index += search.length(); 1041 } 1042 return count; 1043 } 1044 1045 /** 1046 * Debug method for rendering non-ASCII character sequences. 1047 * 1048 * <p> 1049 * Converts non-printable and non-ASCII characters (outside the range <c>0x20-0x7E</c>) to hexadecimal 1050 * sequences in the format <js>"[hex]"</js>. Printable ASCII characters are left unchanged. 1051 * 1052 * <h5 class='section'>Example:</h5> 1053 * <p class='bjava'> 1054 * decodeHex(<js>"Hello"</js>); <jc>// "Hello"</jc> 1055 * decodeHex(<js>"Hello\u0000World"</js>); <jc>// "Hello[0]World"</jc> 1056 * decodeHex(<js>"Hello\u00A9"</js>); <jc>// "Hello[a9]"</jc> 1057 * decodeHex(<jk>null</jk>); <jc>// null</jc> 1058 * </p> 1059 * 1060 * @param s The string to decode. 1061 * @return A string with non-ASCII characters converted to <js>"[hex]"</js> sequences, or <jk>null</jk> if input is <jk>null</jk>. 1062 */ 1063 public static String decodeHex(String s) { 1064 if (s == null) 1065 return null; 1066 var sb = new StringBuilder(); 1067 for (var c : s.toCharArray()) { 1068 if (c < ' ' || c > '~') 1069 sb.append("[").append(Integer.toHexString(c)).append("]"); 1070 else 1071 sb.append(c); 1072 } 1073 return sb.toString(); 1074 } 1075 1076 /** 1077 * Decompresses a GZIP-compressed byte array into a UTF-8 string. 1078 * 1079 * <p> 1080 * This method is the inverse of {@link #compress(String)}. It takes a byte array that was compressed 1081 * using GZIP compression and decompresses it into a UTF-8 encoded string. 1082 * 1083 * <h5 class='section'>Example:</h5> 1084 * <p class='bjava'> 1085 * <jc>// Compress a string</jc> 1086 * byte[] <jv>compressed</jv> = compress(<js>"Hello World"</js>); 1087 * 1088 * <jc>// Decompress it back</jc> 1089 * String <jv>decompressed</jv> = decompress(<jv>compressed</jv>); 1090 * <jc>// Returns: "Hello World"</jc> 1091 * </p> 1092 * 1093 * @param is The GZIP-compressed byte array to decompress. 1094 * @return The decompressed UTF-8 string. 1095 * @throws Exception If decompression fails or the input is not valid GZIP data. 1096 * @see #compress(String) 1097 */ 1098 public static String decompress(byte[] is) throws Exception { 1099 return read(new GZIPInputStream(new ByteArrayInputStream(is))); 1100 } 1101 1102 /** 1103 * Returns the specified string, or the default string if that string is <jk>null</jk> or blank. 1104 * 1105 * @param str The string value to check. 1106 * @param defaultStr The default string to return if the string is <jk>null</jk> or blank. 1107 * @return The string value, or the default string if the string is <jk>null</jk> or blank. 1108 */ 1109 public static String defaultIfBlank(String str, String defaultStr) { 1110 return isBlank(str) ? defaultStr : str; 1111 } 1112 1113 /** 1114 * Returns the specified string, or the default string if that string is <jk>null</jk> or empty. 1115 * 1116 * @param str The string value to check. 1117 * @param defaultStr The default string to return if the string is <jk>null</jk> or empty. 1118 * @return The string value, or the default string if the string is <jk>null</jk> or empty. 1119 */ 1120 public static String defaultIfEmpty(String str, String defaultStr) { 1121 return isEmpty(str) ? defaultStr : str; 1122 } 1123 1124 /** 1125 * Finds the position where the two strings first differ. 1126 * 1127 * <p> 1128 * This method compares strings character by character and returns the index of the first position 1129 * where they differ. If the strings are equal, returns <c>-1</c>. If one string is a prefix of the other, 1130 * returns the length of the shorter string. 1131 * 1132 * <h5 class='section'>Example:</h5> 1133 * <p class='bjava'> 1134 * diffPosition(<js>"apple"</js>, <js>"apple"</js>); <jc>// -1 (equal)</jc> 1135 * diffPosition(<js>"apple"</js>, <js>"apricot"</js>); <jc>// 2 (differs at 'p' vs 'r')</jc> 1136 * diffPosition(<js>"apple"</js>, <js>"app"</js>); <jc>// 3 (shorter string ends here)</jc> 1137 * diffPosition(<js>"app"</js>, <js>"apple"</js>); <jc>// 3 (shorter string ends here)</jc> 1138 * </p> 1139 * 1140 * @param s1 The first string. 1141 * @param s2 The second string. 1142 * @return The position where the two strings differ, or <c>-1</c> if they're equal. 1143 * @see #diffPositionIc(String, String) 1144 */ 1145 public static int diffPosition(String s1, String s2) { 1146 s1 = emptyIfNull(s1); 1147 s2 = emptyIfNull(s2); 1148 var i = 0; 1149 var len = Math.min(s1.length(), s2.length()); 1150 while (i < len) { 1151 var j = s1.charAt(i) - s2.charAt(i); 1152 if (j != 0) 1153 return i; 1154 i++; 1155 } 1156 if (eq(s1.length(), s2.length())) 1157 return -1; 1158 return i; 1159 } 1160 1161 /** 1162 * Finds the position where the two strings first differ, ignoring case. 1163 * 1164 * <p> 1165 * This method compares strings character by character (case-insensitive) and returns the index of the first position 1166 * where they differ. If the strings are equal (ignoring case), returns <c>-1</c>. If one string is a prefix of the other, 1167 * returns the length of the shorter string. 1168 * 1169 * <h5 class='section'>Example:</h5> 1170 * <p class='bjava'> 1171 * diffPositionIc(<js>"Apple"</js>, <js>"apple"</js>); <jc>// -1 (equal ignoring case)</jc> 1172 * diffPositionIc(<js>"Apple"</js>, <js>"Apricot"</js>); <jc>// 2 (differs at 'p' vs 'r')</jc> 1173 * diffPositionIc(<js>"APPLE"</js>, <js>"app"</js>); <jc>// 3 (shorter string ends here)</jc> 1174 * </p> 1175 * 1176 * @param s1 The first string. 1177 * @param s2 The second string. 1178 * @return The position where the two strings differ, or <c>-1</c> if they're equal (ignoring case). 1179 * @see #diffPosition(String, String) 1180 */ 1181 public static int diffPositionIc(String s1, String s2) { 1182 s1 = emptyIfNull(s1); 1183 s2 = emptyIfNull(s2); 1184 var i = 0; 1185 var len = Math.min(s1.length(), s2.length()); 1186 while (i < len) { 1187 var j = toLowerCase(s1.charAt(i)) - toLowerCase(s2.charAt(i)); 1188 if (j != 0) 1189 return i; 1190 i++; 1191 } 1192 if (eq(s1.length(), s2.length())) 1193 return -1; 1194 return i; 1195 } 1196 1197 /** 1198 * Removes duplicate elements from a string array, preserving order. 1199 * 1200 * <p> 1201 * Returns <jk>null</jk> if the array is <jk>null</jk>. 1202 * Uses a {@link LinkedHashSet} to preserve insertion order while removing duplicates. 1203 * 1204 * <h5 class='section'>Examples:</h5> 1205 * <p class='bjava'> 1206 * String[] <jv>array</jv> = {<js>"foo"</js>, <js>"bar"</js>, <js>"foo"</js>, <js>"baz"</js>, <js>"bar"</js>}; 1207 * String[] <jv>unique</jv> = distinct(<jv>array</jv>); 1208 * <jc>// Returns: ["foo", "bar", "baz"]</jc> 1209 * </p> 1210 * 1211 * @param array The array to process. Can be <jk>null</jk>. 1212 * @return A new array with duplicate elements removed, or <jk>null</jk> if the array was <jk>null</jk>. 1213 */ 1214 public static String[] distinct(String[] array) { 1215 if (array == null) 1216 return null; // NOSONAR - Intentional. 1217 return Arrays.stream(array).collect(Collectors.toCollection(LinkedHashSet::new)).toArray(new String[0]); 1218 } 1219 1220 /** 1221 * Generates a Double Metaphone code for a string. 1222 * 1223 * <p> 1224 * Double Metaphone is an improved version of Metaphone that returns two codes: 1225 * a primary code and an alternate code. This handles more edge cases and variations. 1226 * 1227 * <h5 class='section'>Example:</h5> 1228 * <p class='bjava'> 1229 * doubleMetaphone(<js>"Smith"</js>); <jc>// "SM0"</jc> 1230 * doubleMetaphone(<js>"Schmidt"</js>); <jc>// "XMT"</jc> 1231 * </p> 1232 * 1233 * @param str The string to generate a Double Metaphone code for. Can be <jk>null</jk>. 1234 * @return An array with two elements: [primary code, alternate code]. Returns <jk>null</jk> if input is <jk>null</jk> or empty. 1235 */ 1236 public static String[] doubleMetaphone(String str) { 1237 if (isEmpty(str)) 1238 return null; 1239 1240 // For simplicity, return the same code for both primary and alternate 1241 // A full Double Metaphone implementation would be much more complex 1242 var primary = metaphone(str); 1243 1244 // Generate alternate code (simplified - full implementation would have different rules) 1245 var alternate = primary; 1246 1247 return a(primary, alternate); 1248 } 1249 1250 /** 1251 * Returns the specified string, or an empty string if that string is <jk>null</jk>. 1252 * 1253 * @param str The string value to check. 1254 * @return The string value, or an empty string if the string is <jk>null</jk>. 1255 */ 1256 public static String emptyIfNull(String str) { 1257 return str == null ? "" : str; 1258 } 1259 1260 /** 1261 * Checks if a string ends with the specified character. 1262 * 1263 * <p> 1264 * This is a null-safe operation. Returns <jk>false</jk> if the string is <jk>null</jk> or empty. 1265 * 1266 * <h5 class='section'>Example:</h5> 1267 * <p class='bjava'> 1268 * endsWith(<js>"Hello"</js>, <js>'o'</js>); <jc>// true</jc> 1269 * endsWith(<js>"Hello"</js>, <js>'H'</js>); <jc>// false</jc> 1270 * endsWith(<jk>null</jk>, <js>'o'</js>); <jc>// false</jc> 1271 * endsWith(<js>""</js>, <js>'o'</js>); <jc>// false</jc> 1272 * </p> 1273 * 1274 * @param s The string to check. Can be <jk>null</jk>. 1275 * @param c The character to check for. 1276 * @return <jk>true</jk> if the specified string is not <jk>null</jk> and ends with the specified character. 1277 * @see #endsWith(String, String) 1278 * @see #endsWithAny(String, char...) 1279 * @see String#endsWith(String) 1280 */ 1281 public static boolean endsWith(String s, char c) { 1282 if (nn(s)) { 1283 var i = s.length(); 1284 if (i > 0) 1285 return s.charAt(i - 1) == c; 1286 } 1287 return false; 1288 } 1289 1290 /** 1291 * Checks if a string ends with the specified string. 1292 * 1293 * <p> 1294 * This is a null-safe operation. Returns <jk>false</jk> if the string is <jk>null</jk>. 1295 * Otherwise behaves the same as {@link String#endsWith(String)}. 1296 * 1297 * <h5 class='section'>Example:</h5> 1298 * <p class='bjava'> 1299 * endsWith(<js>"Hello World"</js>, <js>"World"</js>); <jc>// true</jc> 1300 * endsWith(<js>"Hello World"</js>, <js>"Hello"</js>); <jc>// false</jc> 1301 * endsWith(<jk>null</jk>, <js>"World"</js>); <jc>// false</jc> 1302 * </p> 1303 * 1304 * @param s The string to check. Can be <jk>null</jk>. 1305 * @param suffix The suffix to check for. 1306 * @return <jk>true</jk> if the string ends with the specified suffix. 1307 * @see #endsWith(String, char) 1308 * @see #endsWithAny(String, String...) 1309 * @see String#endsWith(String) 1310 */ 1311 public static boolean endsWith(String s, String suffix) { 1312 return s != null && s.endsWith(suffix); 1313 } 1314 1315 /** 1316 * Checks if a string ends with any of the specified characters. 1317 * 1318 * <p> 1319 * This is a null-safe operation. Returns <jk>false</jk> if: 1320 * <ul> 1321 * <li>The string is <jk>null</jk> or empty</li> 1322 * <li>The characters array is <jk>null</jk> or empty</li> 1323 * <li>The string does not end with any of the specified characters</li> 1324 * </ul> 1325 * 1326 * <h5 class='section'>Example:</h5> 1327 * <p class='bjava'> 1328 * endsWithAny(<js>"Hello"</js>, <js>'o'</js>, <js>'x'</js>); <jc>// true (ends with 'o')</jc> 1329 * endsWithAny(<js>"Hello"</js>, <js>'x'</js>, <js>'y'</js>); <jc>// false</jc> 1330 * endsWithAny(<jk>null</jk>, <js>'o'</js>); <jc>// false</jc> 1331 * </p> 1332 * 1333 * @param s The string to check. Can be <jk>null</jk>. 1334 * @param c The characters to check for. 1335 * @return <jk>true</jk> if the string ends with any of the specified characters. 1336 * @see #endsWith(String, char) 1337 * @see #endsWithAny(String, String...) 1338 */ 1339 public static boolean endsWithAny(String s, char...c) { 1340 if (nn(s)) { 1341 var i = s.length(); 1342 if (i > 0) { 1343 var c2 = s.charAt(i - 1); 1344 for (var cc : c) 1345 if (c2 == cc) 1346 return true; 1347 } 1348 } 1349 return false; 1350 } 1351 1352 /** 1353 * Checks if a string ends with any of the specified strings. 1354 * 1355 * <p> 1356 * This is a null-safe operation. Returns <jk>false</jk> if: 1357 * <ul> 1358 * <li>The string is <jk>null</jk></li> 1359 * <li>The suffixes array is <jk>null</jk> or empty</li> 1360 * <li>The string does not end with any of the specified suffixes</li> 1361 * </ul> 1362 * 1363 * <h5 class='section'>Example:</h5> 1364 * <p class='bjava'> 1365 * endsWithAny(<js>"Hello World"</js>, <js>"World"</js>, <js>"Foo"</js>); <jc>// true (ends with "World")</jc> 1366 * endsWithAny(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// false</jc> 1367 * endsWithAny(<jk>null</jk>, <js>"World"</js>); <jc>// false</jc> 1368 * </p> 1369 * 1370 * @param s The string to check. Can be <jk>null</jk>. 1371 * @param suffixes The suffixes to check for. 1372 * @return <jk>true</jk> if the string ends with any of the specified suffixes. 1373 * @see #endsWith(String, String) 1374 * @see #endsWithAny(String, char...) 1375 * @see String#endsWith(String) 1376 */ 1377 public static boolean endsWithAny(String s, String...suffixes) { 1378 if (s == null || suffixes == null || suffixes.length == 0) 1379 return false; 1380 for (var suffix : suffixes) { 1381 if (s.endsWith(suffix)) 1382 return true; 1383 } 1384 return false; 1385 } 1386 1387 /** 1388 * Checks if a string ends with a suffix, ignoring case. 1389 * 1390 * <h5 class='section'>Example:</h5> 1391 * <p class='bjava'> 1392 * endsWithIgnoreCase(<js>"Hello World"</js>, <js>"world"</js>); <jc>// true</jc> 1393 * endsWithIgnoreCase(<js>"Hello World"</js>, <js>"WORLD"</js>); <jc>// true</jc> 1394 * endsWithIgnoreCase(<js>"hello world"</js>, <js>"hello"</js>); <jc>// false</jc> 1395 * </p> 1396 * 1397 * @param str The string to check. 1398 * @param suffix The suffix to check for. 1399 * @return <jk>true</jk> if the string ends with the suffix (ignoring case), <jk>false</jk> otherwise. 1400 */ 1401 public static boolean endsWithIgnoreCase(String str, String suffix) { 1402 if (str == null || suffix == null) 1403 return false; 1404 return str.toLowerCase().endsWith(suffix.toLowerCase()); 1405 } 1406 1407 /** 1408 * Calculates the entropy of a string. 1409 * 1410 * <p> 1411 * Entropy measures the randomness or information content of a string. 1412 * Higher entropy indicates more randomness. The formula used is: 1413 * <c>H(X) = -Σ P(x) * log2(P(x))</c> 1414 * where P(x) is the probability of character x. 1415 * 1416 * <h5 class='section'>Example:</h5> 1417 * <p class='bjava'> 1418 * entropy(<js>"aaaa"</js>); <jc>// 0.0 (no randomness)</jc> 1419 * entropy(<js>"abcd"</js>); <jc>// 2.0 (high randomness)</jc> 1420 * entropy(<js>"hello"</js>); <jc>// ~2.32</jc> 1421 * </p> 1422 * 1423 * @param str The string to calculate entropy for. Can be <jk>null</jk>. 1424 * @return The entropy value (0.0 or higher), or <c>0.0</c> if the string is <jk>null</jk> or empty. 1425 */ 1426 public static double entropy(String str) { 1427 if (isEmpty(str)) 1428 return 0.0; 1429 1430 var length = str.length(); 1431 1432 // Count character frequencies 1433 var charCounts = new int[Character.MAX_VALUE + 1]; 1434 for (var i = 0; i < length; i++) { 1435 charCounts[str.charAt(i)]++; 1436 } 1437 1438 // Calculate entropy 1439 var entropy = 0.0; 1440 for (var count : charCounts) { 1441 if (count > 0) { 1442 var probability = (double)count / length; 1443 entropy -= probability * (Math.log(probability) / Math.log(2.0)); 1444 } 1445 } 1446 1447 return entropy; 1448 } 1449 1450 /** 1451 * Tests two objects for case-insensitive string equality. 1452 * 1453 * <p> 1454 * Converts both objects to strings using {@link Object#toString()} before comparison. 1455 * This method handles <jk>null</jk> values gracefully: 1456 * <ul> 1457 * <li>Both <jk>null</jk> → returns <jk>true</jk></li> 1458 * <li>One <jk>null</jk> → returns <jk>false</jk></li> 1459 * <li>Neither <jk>null</jk> → compares string representations ignoring case</li> 1460 * </ul> 1461 * 1462 * <h5 class='section'>Example:</h5> 1463 * <p class='bjava'> 1464 * equalsIgnoreCase(<js>"Hello"</js>, <js>"HELLO"</js>); <jc>// true</jc> 1465 * equalsIgnoreCase(<js>"Hello"</js>, <js>"World"</js>); <jc>// false</jc> 1466 * equalsIgnoreCase(<jk>null</jk>, <jk>null</jk>); <jc>// true</jc> 1467 * equalsIgnoreCase(<js>"Hello"</js>, <jk>null</jk>); <jc>// false</jc> 1468 * equalsIgnoreCase(123, <js>"123"</js>); <jc>// true (converts 123 to "123")</jc> 1469 * </p> 1470 * 1471 * @param a Object 1. 1472 * @param b Object 2. 1473 * @return <jk>true</jk> if both objects are equal ignoring case. 1474 * @see #equalsIgnoreCase(String, String) 1475 * @see Utils#eqic(Object, Object) 1476 */ 1477 public static boolean equalsIgnoreCase(Object a, Object b) { 1478 if (a == null && b == null) 1479 return true; 1480 if (a == null || b == null) 1481 return false; 1482 return a.toString().equalsIgnoreCase(b.toString()); 1483 } 1484 1485 /** 1486 * Tests two strings for case-insensitive equality, but gracefully handles nulls. 1487 * 1488 * <p> 1489 * This method handles <jk>null</jk> values gracefully: 1490 * <ul> 1491 * <li>Both <jk>null</jk> → returns <jk>true</jk> (same reference check)</li> 1492 * <li>One <jk>null</jk> → returns <jk>false</jk></li> 1493 * <li>Neither <jk>null</jk> → compares strings ignoring case</li> 1494 * </ul> 1495 * 1496 * <h5 class='section'>Example:</h5> 1497 * <p class='bjava'> 1498 * equalsIgnoreCase(<js>"Hello"</js>, <js>"hello"</js>); <jc>// true</jc> 1499 * equalsIgnoreCase(<js>"Hello"</js>, <js>"WORLD"</js>); <jc>// false</jc> 1500 * equalsIgnoreCase(<jk>null</jk>, <jk>null</jk>); <jc>// true</jc> 1501 * equalsIgnoreCase(<js>"Hello"</js>, <jk>null</jk>); <jc>// false</jc> 1502 * </p> 1503 * 1504 * @param str1 The first string. 1505 * @param str2 The second string. 1506 * @return <jk>true</jk> if the strings are equal ignoring case, <jk>false</jk> otherwise. 1507 * @see #equalsIgnoreCase(Object, Object) 1508 * @see Utils#eqic(String, String) 1509 */ 1510 public static boolean equalsIgnoreCase(String str1, String str2) { 1511 if (str1 == str2) 1512 return true; 1513 if (str1 == null || str2 == null) 1514 return false; 1515 return str1.equalsIgnoreCase(str2); 1516 } 1517 1518 /** 1519 * Escapes the specified characters in the string. 1520 * 1521 * @param s The string with characters to escape. 1522 * @param escaped The characters to escape. 1523 * @return The string with characters escaped, or the same string if no escapable characters were found. 1524 */ 1525 public static String escapeChars(String s, AsciiSet escaped) { 1526 if (s == null || s.isEmpty()) 1527 return s; 1528 1529 var count = 0; 1530 for (var i = 0; i < s.length(); i++) 1531 if (escaped.contains(s.charAt(i))) 1532 count++; 1533 if (count == 0) 1534 return s; 1535 1536 var sb = new StringBuffer(s.length() + count); 1537 for (var i = 0; i < s.length(); i++) { 1538 var c = s.charAt(i); 1539 if (escaped.contains(c)) 1540 sb.append('\\'); 1541 sb.append(c); 1542 } 1543 return sb.toString(); 1544 } 1545 1546 /** 1547 * Escapes a string for safe inclusion in Java source code. 1548 * 1549 * <p>This method converts special characters to their Java escape sequences and 1550 * converts non-printable ASCII characters to Unicode escape sequences. 1551 * 1552 * <h5 class='section'>Escape mappings:</h5> 1553 * <ul> 1554 * <li>{@code "} → {@code \"}</li> 1555 * <li>{@code \} → {@code \\}</li> 1556 * <li>{@code \n} → {@code \\n}</li> 1557 * <li>{@code \r} → {@code \\r}</li> 1558 * <li>{@code \t} → {@code \\t}</li> 1559 * <li>{@code \f} → {@code \\f}</li> 1560 * <li>{@code \b} → {@code \\b}</li> 1561 * <li>Non-printable characters → {@code \\uXXXX}</li> 1562 * </ul> 1563 * 1564 * <h5 class='section'>Example:</h5> 1565 * <p class='bjava'> 1566 * <jk>var</jk> <jv>escaped</jv> = <jsm>escapeForJava</jsm>(<js>"Hello\nWorld\"Test\""</js>); 1567 * <jc>// Returns: "Hello\\nWorld\\\"Test\\\""</jc> 1568 * </p> 1569 * 1570 * @param s The string to escape. 1571 * @return The escaped string safe for Java source code, or <jk>null</jk> if input is <jk>null</jk>. 1572 */ 1573 public static String escapeForJava(String s) { 1574 if (s == null) 1575 return null; 1576 var sb = new StringBuilder(); 1577 for (var c : s.toCharArray()) { 1578 sb.append(switch (c) { 1579 case '\"' -> "\\\""; 1580 case '\\' -> "\\\\"; 1581 case '\n' -> "\\n"; 1582 case '\r' -> "\\r"; 1583 case '\t' -> "\\t"; 1584 case '\f' -> "\\f"; 1585 case '\b' -> "\\b"; 1586 default -> { 1587 if (c < 0x20 || c > 0x7E) 1588 yield String.format("\\u%04x", (int)c); 1589 yield String.valueOf(c); 1590 } 1591 }); 1592 } 1593 return sb.toString(); 1594 } 1595 1596 /** 1597 * Escapes HTML entities in a string. 1598 * 1599 * <p> 1600 * Escapes the following characters: 1601 * <ul> 1602 * <li><js>'&'</js> → <js>"&amp;"</js></li> 1603 * <li><js>'<'</js> → <js>"&lt;"</js></li> 1604 * <li><js>'>'</js> → <js>"&gt;"</js></li> 1605 * <li><js>'"'</js> → <js>"&quot;"</js></li> 1606 * <li><js>'\''</js> → <js>"&#39;"</js></li> 1607 * </ul> 1608 * 1609 * <h5 class='section'>Example:</h5> 1610 * <p class='bjava'> 1611 * escapeHtml(<js>"<script>alert('xss')</script>"</js>); 1612 * <jc>// Returns: "&lt;script&gt;alert(&#39;xss&#39;)&lt;/script&gt;"</jc> 1613 * </p> 1614 * 1615 * @param str The string to escape. 1616 * @return The escaped string, or <jk>null</jk> if input is <jk>null</jk>. 1617 */ 1618 public static String escapeHtml(String str) { 1619 if (str == null) 1620 return null; 1621 var sb = new StringBuilder(str.length() * 2); 1622 for (var i = 0; i < str.length(); i++) { 1623 var c = str.charAt(i); 1624 switch (c) { 1625 case '&' -> sb.append("&"); 1626 case '<' -> sb.append("<"); 1627 case '>' -> sb.append(">"); 1628 case '"' -> sb.append("""); 1629 case '\'' -> sb.append("'"); 1630 default -> sb.append(c); 1631 } 1632 } 1633 return sb.toString(); 1634 } 1635 1636 /** 1637 * Escapes regex special characters in a string. 1638 * 1639 * <p> 1640 * Escapes the following regex special characters: <js>\.*+?^${}()[]|\\</js> 1641 * 1642 * <h5 class='section'>Example:</h5> 1643 * <p class='bjava'> 1644 * escapeRegex(<js>"file.txt"</js>); <jc>// Returns: "file\\.txt"</jc> 1645 * escapeRegex(<js>"price: $10.99"</js>); <jc>// Returns: "price: \\$10\\.99"</jc> 1646 * </p> 1647 * 1648 * @param str The string to escape. 1649 * @return The escaped string, or <jk>null</jk> if input is <jk>null</jk>. 1650 */ 1651 public static String escapeRegex(String str) { 1652 if (str == null) 1653 return null; 1654 // Escape regex special characters: . * + ? ^ $ { } ( ) [ ] | \ 1655 return str.replace("\\", "\\\\").replace(".", "\\.").replace("*", "\\*").replace("+", "\\+").replace("?", "\\?").replace("^", "\\^").replace("$", "\\$").replace("{", "\\{").replace("}", "\\}") 1656 .replace("(", "\\(").replace(")", "\\)").replace("[", "\\[").replace("]", "\\]").replace("|", "\\|"); 1657 } 1658 1659 /** 1660 * Escapes SQL string literals by doubling single quotes. 1661 * 1662 * <p> 1663 * Basic SQL escaping for string literals. Escapes single quotes by doubling them. 1664 * This is a basic implementation - for production use, consider using prepared statements. 1665 * 1666 * <h5 class='section'>Example:</h5> 1667 * <p class='bjava'> 1668 * escapeSql(<js>"O'Brien"</js>); <jc>// Returns: "O''Brien"</jc> 1669 * escapeSql(<js>"It's a test"</js>); <jc>// Returns: "It''s a test"</jc> 1670 * </p> 1671 * 1672 * @param str The string to escape. 1673 * @return The escaped string, or <jk>null</jk> if input is <jk>null</jk>. 1674 */ 1675 public static String escapeSql(String str) { 1676 if (str == null) 1677 return null; 1678 return str.replace("'", "''"); 1679 } 1680 1681 /** 1682 * Escapes XML entities in a string. 1683 * 1684 * <p> 1685 * Escapes the following characters: 1686 * <ul> 1687 * <li><js>'&'</js> → <js>"&amp;"</js></li> 1688 * <li><js>'<'</js> → <js>"&lt;"</js></li> 1689 * <li><js>'>'</js> → <js>"&gt;"</js></li> 1690 * <li><js>'"'</js> → <js>"&quot;"</js></li> 1691 * <li><js>'\''</js> → <js>"&apos;"</js></li> 1692 * </ul> 1693 * 1694 * <h5 class='section'>Example:</h5> 1695 * <p class='bjava'> 1696 * escapeXml(<js>"<tag attr='value'>text</tag>"</js>); 1697 * <jc>// Returns: "&lt;tag attr=&apos;value&apos;&gt;text&lt;/tag&gt;"</jc> 1698 * </p> 1699 * 1700 * @param str The string to escape. 1701 * @return The escaped string, or <jk>null</jk> if input is <jk>null</jk>. 1702 */ 1703 public static String escapeXml(String str) { 1704 if (str == null) 1705 return null; 1706 var sb = new StringBuilder(str.length() * 2); 1707 for (var i = 0; i < str.length(); i++) { 1708 var c = str.charAt(i); 1709 switch (c) { 1710 case '&' -> sb.append("&"); 1711 case '<' -> sb.append("<"); 1712 case '>' -> sb.append(">"); 1713 case '"' -> sb.append("""); 1714 case '\'' -> sb.append("'"); 1715 default -> sb.append(c); 1716 } 1717 } 1718 return sb.toString(); 1719 } 1720 1721 /** 1722 * Extracts all text segments between start and end markers. 1723 * 1724 * <p> 1725 * Finds all occurrences of text between the start and end markers (non-overlapping). 1726 * 1727 * <h5 class='section'>Example:</h5> 1728 * <p class='bjava'> 1729 * extractBetween(<js>"<tag>content</tag>"</js>, <js>"<"</js>, <js>">"</js>); 1730 * <jc>// ["tag", "/tag"]</jc> 1731 * extractBetween(<js>"[one][two][three]"</js>, <js>"["</js>, <js>"]"</js>); 1732 * <jc>// ["one", "two", "three"]</jc> 1733 * </p> 1734 * 1735 * @param str The string to extract from. Can be <jk>null</jk>. 1736 * @param start The start marker. Can be <jk>null</jk>. 1737 * @param end The end marker. Can be <jk>null</jk>. 1738 * @return A list of text segments found between the markers, or an empty list if any parameter is <jk>null</jk> or empty. 1739 */ 1740 public static List<String> extractBetween(String str, String start, String end) { 1741 if (isEmpty(str) || isEmpty(start) || isEmpty(end)) 1742 return Collections.emptyList(); 1743 1744 var result = new ArrayList<String>(); 1745 var startIndex = 0; 1746 while (true) { 1747 var startPos = str.indexOf(start, startIndex); 1748 if (startPos == -1) 1749 break; 1750 var endPos = str.indexOf(end, startPos + start.length()); 1751 if (endPos == -1) 1752 break; 1753 result.add(str.substring(startPos + start.length(), endPos)); 1754 startIndex = endPos + end.length(); 1755 } 1756 return result; 1757 } 1758 1759 /** 1760 * Extracts all email addresses from a string. 1761 * 1762 * <p> 1763 * Uses a basic email regex pattern to find email addresses. 1764 * 1765 * <h5 class='section'>Example:</h5> 1766 * <p class='bjava'> 1767 * extractEmails(<js>"Contact: user@example.com or admin@test.org"</js>); 1768 * <jc>// ["user@example.com", "admin@test.org"]</jc> 1769 * </p> 1770 * 1771 * @param str The string to extract emails from. Can be <jk>null</jk>. 1772 * @return A list of email addresses found in the input, or an empty list if the string is <jk>null</jk> or empty. 1773 */ 1774 public static List<String> extractEmails(String str) { 1775 if (isEmpty(str)) 1776 return Collections.emptyList(); 1777 1778 var result = new ArrayList<String>(); 1779 // Email regex pattern (same as isEmail but without ^ and $ anchors) 1780 var pattern = Pattern.compile("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}"); 1781 var matcher = pattern.matcher(str); 1782 while (matcher.find()) { 1783 result.add(matcher.group()); 1784 } 1785 return result; 1786 } 1787 1788 /** 1789 * Extracts all numeric sequences from a string. 1790 * 1791 * <p> 1792 * Finds all sequences of digits (including decimal numbers with dots). 1793 * 1794 * <h5 class='section'>Example:</h5> 1795 * <p class='bjava'> 1796 * extractNumbers(<js>"Price: $19.99, Quantity: 5"</js>); 1797 * <jc>// ["19.99", "5"]</jc> 1798 * extractNumbers(<js>"Version 1.2.3"</js>); 1799 * <jc>// ["1.2", "3"]</jc> 1800 * </p> 1801 * 1802 * @param str The string to extract numbers from. Can be <jk>null</jk>. 1803 * @return A list of numeric strings found in the input, or an empty list if the string is <jk>null</jk> or empty. 1804 */ 1805 public static List<String> extractNumbers(String str) { 1806 if (isEmpty(str)) 1807 return Collections.emptyList(); 1808 1809 var result = new ArrayList<String>(); 1810 var pattern = Pattern.compile("\\d+(?:\\.\\d+)?"); 1811 var matcher = pattern.matcher(str); 1812 while (matcher.find()) { 1813 result.add(matcher.group()); 1814 } 1815 return result; 1816 } 1817 1818 /** 1819 * Extracts all URLs from a string. 1820 * 1821 * <p> 1822 * Uses a basic URL regex pattern to find URLs (http, https, ftp). 1823 * 1824 * <h5 class='section'>Example:</h5> 1825 * <p class='bjava'> 1826 * extractUrls(<js>"Visit https://example.com or http://test.org"</js>); 1827 * <jc>// ["https://example.com", "http://test.org"]</jc> 1828 * </p> 1829 * 1830 * @param str The string to extract URLs from. Can be <jk>null</jk>. 1831 * @return A list of URLs found in the input, or an empty list if the string is <jk>null</jk> or empty. 1832 */ 1833 public static List<String> extractUrls(String str) { 1834 if (isEmpty(str)) 1835 return Collections.emptyList(); 1836 1837 var result = new ArrayList<String>(); 1838 // Basic URL pattern: protocol://domain/path 1839 var pattern = Pattern.compile("(?:https?|ftp)://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+", Pattern.CASE_INSENSITIVE); 1840 var matcher = pattern.matcher(str); 1841 while (matcher.find()) { 1842 result.add(matcher.group()); 1843 } 1844 return result; 1845 } 1846 1847 /** 1848 * Extracts all words from a string. 1849 * 1850 * <p> 1851 * A word is defined as a sequence of letters, digits, and underscores. 1852 * 1853 * <h5 class='section'>Example:</h5> 1854 * <p class='bjava'> 1855 * extractWords(<js>"Hello world! This is a test."</js>); 1856 * <jc>// ["Hello", "world", "This", "is", "a", "test"]</jc> 1857 * </p> 1858 * 1859 * @param str The string to extract words from. Can be <jk>null</jk>. 1860 * @return A list of words found in the input, or an empty list if the string is <jk>null</jk> or empty. 1861 */ 1862 public static List<String> extractWords(String str) { 1863 if (isEmpty(str)) 1864 return Collections.emptyList(); 1865 1866 var result = new ArrayList<String>(); 1867 // Word pattern: sequence of word characters (letters, digits, underscore) 1868 var pattern = Pattern.compile("\\w+"); 1869 var matcher = pattern.matcher(str); 1870 while (matcher.find()) { 1871 result.add(matcher.group()); 1872 } 1873 return result; 1874 } 1875 1876 /** 1877 * Filters a string array using the specified predicate. 1878 * 1879 * <p> 1880 * Returns <jk>null</jk> if the array is <jk>null</jk>. 1881 * Returns an empty array if the predicate is <jk>null</jk> or no elements match. 1882 * 1883 * <h5 class='section'>Examples:</h5> 1884 * <p class='bjava'> 1885 * String[] <jv>array</jv> = {<js>"foo"</js>, <js>""</js>, <js>"bar"</js>, <jk>null</jk>, <js>"baz"</js>}; 1886 * String[] <jv>filtered</jv> = filter(<jv>array</jv>, StringUtils.<jsf>NOT_EMPTY</jsf>); 1887 * <jc>// Returns: ["foo", "bar", "baz"]</jc> 1888 * 1889 * String[] <jv>longStrings</jv> = filter(<jv>array</jv>, s -> s != <jk>null</jk> && s.length() > 3); 1890 * <jc>// Returns: ["baz"]</jc> 1891 * </p> 1892 * 1893 * @param array The array to filter. Can be <jk>null</jk>. 1894 * @param predicate The predicate to apply to each element. Can be <jk>null</jk>. 1895 * @return A new array containing only the elements that match the predicate, or <jk>null</jk> if the array was <jk>null</jk>. 1896 */ 1897 public static String[] filter(String[] array, Predicate<String> predicate) { 1898 if (array == null) 1899 return null; // NOSONAR - Intentional. 1900 if (predicate == null) 1901 return new String[0]; 1902 return Arrays.stream(array).filter(predicate).toArray(String[]::new); 1903 } 1904 1905 /** 1906 * Returns the first character in the specified string. 1907 * 1908 * <p> 1909 * This is a null-safe and bounds-safe operation. Returns <c>0</c> (null character) if: 1910 * <ul> 1911 * <li>The string is <jk>null</jk></li> 1912 * <li>The string is empty</li> 1913 * </ul> 1914 * 1915 * <h5 class='section'>Example:</h5> 1916 * <p class='bjava'> 1917 * firstChar(<js>"Hello"</js>); <jc>// 'H'</jc> 1918 * firstChar(<js>"World"</js>); <jc>// 'W'</jc> 1919 * firstChar(<js>""</js>); <jc>// 0 (empty string)</jc> 1920 * firstChar(<jk>null</jk>); <jc>// 0 (null string)</jc> 1921 * </p> 1922 * 1923 * @param s The string to check. 1924 * @return The first character in the string, or <c>0</c> if the string is <jk>null</jk> or empty. 1925 * @see #charAt(String, int) 1926 * @see #firstNonWhitespaceChar(String) 1927 */ 1928 public static char firstChar(String s) { 1929 if (s == null || s.isEmpty()) 1930 return 0; 1931 return s.charAt(0); 1932 } 1933 1934 /** 1935 * Returns the first non-blank string in the array. 1936 * 1937 * @param vals The strings to check. 1938 * @return The first non-blank string, or <jk>null</jk> if all values were blank or <jk>null</jk>. 1939 */ 1940 public static String firstNonBlank(String...vals) { 1941 for (var v : vals) { 1942 if (isNotBlank(v)) 1943 return v; 1944 } 1945 return null; 1946 } 1947 1948 /** 1949 * Returns the first non-null, non-empty string in the list. 1950 * 1951 * <p> 1952 * This method iterates through the provided strings and returns the first one that is not <jk>null</jk> 1953 * and not empty (as determined by {@link Utils#ne(CharSequence)}). 1954 * 1955 * <h5 class='section'>Example:</h5> 1956 * <p class='bjava'> 1957 * firstNonEmpty(<jk>null</jk>, <js>""</js>, <js>"Hello"</js>, <js>"World"</js>); <jc>// "Hello"</jc> 1958 * firstNonEmpty(<js>"Hello"</js>, <js>"World"</js>); <jc>// "Hello"</jc> 1959 * firstNonEmpty(<jk>null</jk>, <js>""</js>); <jc>// null</jc> 1960 * firstNonEmpty(); <jc>// null</jc> 1961 * </p> 1962 * 1963 * @param s The strings to test. 1964 * @return The first non-empty string in the list, or <jk>null</jk> if they were all <jk>null</jk> or empty. 1965 * @see #firstNonBlank(String...) 1966 * @see Utils#ne(CharSequence) 1967 */ 1968 public static String firstNonEmpty(String...s) { 1969 for (var ss : s) 1970 if (ne(ss)) 1971 return ss; 1972 return null; 1973 } 1974 1975 /** 1976 * Returns the first non-whitespace character in the string. 1977 * 1978 * <p> 1979 * This method scans the string from the beginning and returns the first character that is not 1980 * a whitespace character (as determined by {@link Character#isWhitespace(char)}). 1981 * 1982 * <h5 class='section'>Example:</h5> 1983 * <p class='bjava'> 1984 * firstNonWhitespaceChar(<js>"Hello"</js>); <jc>// 'H'</jc> 1985 * firstNonWhitespaceChar(<js>" Hello"</js>); <jc>// 'H'</jc> 1986 * firstNonWhitespaceChar(<js>"\t\nWorld"</js>); <jc>// 'W'</jc> 1987 * firstNonWhitespaceChar(<js>" "</js>); <jc>// 0 (only whitespace)</jc> 1988 * firstNonWhitespaceChar(<jk>null</jk>); <jc>// 0 (null string)</jc> 1989 * </p> 1990 * 1991 * @param s The string to check. 1992 * @return The first non-whitespace character, or <c>0</c> if the string is <jk>null</jk>, empty, or composed of only whitespace. 1993 * @see #firstChar(String) 1994 * @see Character#isWhitespace(char) 1995 */ 1996 public static char firstNonWhitespaceChar(String s) { 1997 if (nn(s)) 1998 for (var i = 0; i < s.length(); i++) 1999 if (! isWhitespace(s.charAt(i))) 2000 return s.charAt(i); 2001 return 0; 2002 } 2003 2004 /** 2005 * URL-encodes invalid characters in a URI string. 2006 * 2007 * <p> 2008 * This method escapes characters that are not valid in URIs by converting them to percent-encoded 2009 * format. Spaces are converted to <js>"+"</js> characters, and other invalid characters are 2010 * percent-encoded (e.g., <js>"hello world"</js> becomes <js>"hello+world"</js>). 2011 * 2012 * <p> 2013 * Only ASCII characters (0-127) that are not in the valid URI character set are encoded. 2014 * If the string contains no invalid characters, the original string is returned. 2015 * 2016 * <h5 class='section'>Example:</h5> 2017 * <p class='bjava'> 2018 * fixUrl(<js>"hello world"</js>); <jc>// "hello+world"</jc> 2019 * fixUrl(<js>"file://path/to file.txt"</js>); <jc>// "file://path/to+file.txt"</jc> 2020 * fixUrl(<js>"valid-url"</js>); <jc>// "valid-url" (no change)</jc> 2021 * fixUrl(<jk>null</jk>); <jc>// null</jc> 2022 * </p> 2023 * 2024 * @param in The URI string to encode. Can be <jk>null</jk>. 2025 * @return The URI with invalid characters encoded, or <jk>null</jk> if input is <jk>null</jk>. 2026 * @see #urlEncode(String) 2027 */ 2028 @SuppressWarnings("null") 2029 public static String fixUrl(String in) { 2030 2031 if (in == null) 2032 return null; 2033 2034 var sb = (StringBuilder)null; 2035 2036 var m = 0; 2037 2038 for (var i = 0; i < in.length(); i++) { 2039 var c = in.charAt(i); 2040 if (! URI_CHARS.contains(c)) { 2041 sb = append(sb, in.substring(m, i)); 2042 if (c == ' ') 2043 sb.append("+"); 2044 else 2045 sb.append('%').append(toHex2(c)); 2046 m = i + 1; 2047 } 2048 } 2049 if (nn(sb)) { 2050 sb.append(in.substring(m)); 2051 return sb.toString(); 2052 } 2053 return in; 2054 2055 } 2056 2057 /** 2058 * Formats a string using printf-style and/or MessageFormat-style format specifiers. 2059 * 2060 * <p> 2061 * This method provides unified string formatting that supports both printf-style formatting 2062 * (similar to C's <c>printf()</c> function and Java's {@link String#format(String, Object...)}) 2063 * and MessageFormat-style formatting in the same pattern. 2064 * 2065 * <h5 class='section'>Format Support:</h5> 2066 * <ul> 2067 * <li><b>Printf-style:</b> <js>"%s"</js>, <js>"%d"</js>, <js>"%.2f"</js>, <js>"%1$s"</js>, etc.</li> 2068 * <li><b>MessageFormat-style:</b> <js>"{0}"</js>, <js>"{1,number}"</js>, <js>"{2,date}"</js>, etc.</li> 2069 * <li><b>Un-numbered MessageFormat:</b> <js>"{}"</js> - Sequential placeholders that are automatically numbered</li> 2070 * <li><b>Mixed formats:</b> Both styles can be used in the same pattern</li> 2071 * </ul> 2072 * 2073 * <h5 class='section'>Printf Format Specifiers:</h5> 2074 * <ul> 2075 * <li><b>%s</b> - String</li> 2076 * <li><b>%d</b> - Decimal integer</li> 2077 * <li><b>%f</b> - Floating point</li> 2078 * <li><b>%x</b> - Hexadecimal (lowercase)</li> 2079 * <li><b>%X</b> - Hexadecimal (uppercase)</li> 2080 * <li><b>%o</b> - Octal</li> 2081 * <li><b>%b</b> - Boolean</li> 2082 * <li><b>%c</b> - Character</li> 2083 * <li><b>%e</b> - Scientific notation (lowercase)</li> 2084 * <li><b>%E</b> - Scientific notation (uppercase)</li> 2085 * <li><b>%g</b> - General format (lowercase)</li> 2086 * <li><b>%G</b> - General format (uppercase)</li> 2087 * <li><b>%n</b> - Platform-specific line separator</li> 2088 * <li><b>%%</b> - Literal percent sign</li> 2089 * </ul> 2090 * 2091 * <h5 class='section'>Format Specifier Syntax:</h5> 2092 * <p> 2093 * Printf format specifiers follow this pattern: <c>%[argument_index$][flags][width][.precision]conversion</c> 2094 * </p> 2095 * <p> 2096 * MessageFormat placeholders follow this pattern: <c>{argument_index[,format_type[,format_style]]}</c> 2097 * </p> 2098 * 2099 * <h5 class='section'>Examples:</h5> 2100 * <p class='bjava'> 2101 * <jc>// Printf-style formatting</jc> 2102 * format(<js>"Hello %s, you have %d items"</js>, <js>"John"</js>, 5); 2103 * <jc>// Returns: "Hello John, you have 5 items"</jc> 2104 * 2105 * <jc>// Floating point with precision</jc> 2106 * format(<js>"Price: $%.2f"</js>, 19.99); 2107 * <jc>// Returns: "Price: $19.99"</jc> 2108 * 2109 * <jc>// MessageFormat-style formatting</jc> 2110 * format(<js>"Hello {0}, you have {1} items"</js>, <js>"John"</js>, 5); 2111 * <jc>// Returns: "Hello John, you have 5 items"</jc> 2112 * 2113 * <jc>// Un-numbered MessageFormat placeholders (sequential)</jc> 2114 * format(<js>"Hello {}, you have {} items"</js>, <js>"John"</js>, 5); 2115 * <jc>// Returns: "Hello John, you have 5 items"</jc> 2116 * 2117 * <jc>// Mixed format styles in the same pattern</jc> 2118 * format(<js>"User {0} has %d items and %s status"</js>, <js>"Alice"</js>, 10, <js>"active"</js>); 2119 * <jc>// Returns: "User Alice has 10 items and active status"</jc> 2120 * 2121 * <jc>// Width and alignment (printf)</jc> 2122 * format(<js>"Name: %-20s Age: %3d"</js>, <js>"John"</js>, 25); 2123 * <jc>// Returns: "Name: John Age: 25"</jc> 2124 * 2125 * <jc>// Hexadecimal (printf)</jc> 2126 * format(<js>"Color: #%06X"</js>, 0xFF5733); 2127 * <jc>// Returns: "Color: #FF5733"</jc> 2128 * 2129 * <jc>// Argument index (reuse arguments)</jc> 2130 * format(<js>"%1$s loves %2$s, and {0} also loves %3$s"</js>, <js>"Alice"</js>, <js>"Bob"</js>, <js>"Charlie"</js>); 2131 * <jc>// Returns: "Alice loves Bob, and Alice also loves Charlie"</jc> 2132 * </p> 2133 * 2134 * <h5 class='section'>Comparison with mformat():</h5> 2135 * <p> 2136 * This method supports both MessageFormat-style and printf-style formats. 2137 * </p> 2138 * <p class='bjava'> 2139 * <jc>// Both styles supported (this method)</jc> 2140 * format(<js>"Hello %s, you have %d items"</js>, <js>"John"</js>, 5); 2141 * format(<js>"Hello {0}, you have {1} items"</js>, <js>"John"</js>, 5); 2142 * format(<js>"User {0} has %d items"</js>, <js>"Alice"</js>, 10); 2143 * 2144 * <jc>// MessageFormat style only</jc> 2145 * mformat(<js>"Hello {0}, you have {1} items"</js>, <js>"John"</js>, 5); 2146 * </p> 2147 * 2148 * <h5 class='section'>Null Handling:</h5> 2149 * <p> 2150 * Null arguments are formatted as the string <js>"null"</js> for string conversions, 2151 * or cause a {@link NullPointerException} for numeric conversions (consistent with {@link String#format(String, Object...)}). 2152 * </p> 2153 * 2154 * @param pattern The format string supporting both MessageFormat and printf-style placeholders. 2155 * @param args The arguments to format. 2156 * @return The formatted string. 2157 * @throws java.util.IllegalFormatException If the format string is invalid or arguments don't match the format specifiers. 2158 * @see StringFormat for detailed format specification 2159 * @see String#format(String, Object...) 2160 * @see StringFormat for detailed format specification 2161 */ 2162 public static String format(String pattern, Object...args) { 2163 return StringFormat.format(pattern, args); 2164 } 2165 2166 /** 2167 * Simple utility for replacing variables of the form <js>"{key}"</js> with values in the specified map. 2168 * 2169 * <p> 2170 * Supports named MessageFormat-style variables: <js>"{key}"</js> where <c>key</c> is a map key. 2171 * For un-numbered sequential placeholders <js>"{}"</js>, use {@link #format(String, Object...)} instead. 2172 * 2173 * <p> 2174 * Variable values are converted to strings using {@link #readable(Object)} to ensure consistent, 2175 * readable formatting (e.g., byte arrays are converted to hex, collections are formatted without spaces). 2176 * 2177 * <p> 2178 * Nested variables are supported in both the input string and map values. 2179 * 2180 * <p> 2181 * If the map does not contain the specified value, the variable is not replaced. 2182 * 2183 * <p> 2184 * <jk>null</jk> values in the map are treated as blank strings. 2185 * 2186 * @param s The string containing variables to replace. 2187 * @param m The map containing the variable values. 2188 * @return The new string with variables replaced, or the original string if it didn't have variables in it. 2189 */ 2190 public static String formatNamed(String s, Map<String,Object> m) { 2191 2192 if (s == null) 2193 return null; 2194 2195 if (m == null || m.isEmpty() || s.indexOf('{') == -1) 2196 return s; 2197 2198 // S1: Not in variable, looking for '{' 2199 // S2: Found '{', Looking for '}' 2200 2201 var state = S1; 2202 var hasInternalVar = false; 2203 var x = 0; 2204 var depth = 0; 2205 var length = s.length(); 2206 var out = new StringBuilder(); 2207 2208 for (var i = 0; i < length; i++) { 2209 var c = s.charAt(i); 2210 if (state == S1) { 2211 if (c == '{') { 2212 state = S2; 2213 x = i; 2214 } else { 2215 out.append(c); 2216 } 2217 } else /* state == S2 */ { 2218 if (c == '{') { 2219 depth++; 2220 hasInternalVar = true; 2221 } else if (c == '}') { 2222 if (depth > 0) { 2223 depth--; 2224 } else { 2225 var key = s.substring(x + 1, i); 2226 key = (hasInternalVar ? formatNamed(key, m) : key); 2227 hasInternalVar = false; 2228 // JUNEAU-248: Check if key exists in map by attempting to get it 2229 // For regular maps: use containsKey() OR nn(get()) check 2230 // For BeanMaps: get() returns non-null for accessible properties (including hidden ones) 2231 var val = m.get(key); 2232 // Check if key actually exists: either containsKey is true, or val is non-null 2233 // This handles both regular maps and BeanMaps correctly 2234 var keyExists = m.containsKey(key) || nn(val); 2235 if (! keyExists) 2236 out.append('{').append(key).append('}'); 2237 else { 2238 if (val == null) 2239 val = ""; 2240 var v = r(val); 2241 // If the replacement also contains variables, replace them now. 2242 if (v.indexOf('{') != -1) 2243 v = formatNamed(v, m); 2244 out.append(v); 2245 } 2246 state = S1; 2247 } 2248 } 2249 } 2250 } 2251 return out.toString(); 2252 } 2253 2254 /** 2255 * Converts a hexadecimal character string to a byte array. 2256 * 2257 * @param hex The string to convert to a byte array. 2258 * @return A new byte array. 2259 */ 2260 public static byte[] fromHex(String hex) { 2261 var buff = ByteBuffer.allocate(hex.length() / 2); 2262 for (var i = 0; i < hex.length(); i += 2) 2263 buff.put((byte)Integer.parseInt(hex.substring(i, i + 2), 16)); 2264 buff.rewind(); 2265 return buff.array(); 2266 } 2267 2268 /** 2269 * Converts a hexadecimal byte stream (e.g. "34A5BC") into a UTF-8 encoded string. 2270 * 2271 * @param hex The hexadecimal string. 2272 * @return The UTF-8 string. 2273 */ 2274 public static String fromHexToUTF8(String hex) { 2275 var buff = ByteBuffer.allocate(hex.length() / 2); 2276 for (var i = 0; i < hex.length(); i += 2) 2277 buff.put((byte)Integer.parseInt(hex.substring(i, i + 2), 16)); 2278 buff.rewind(); // Fixes Java 11 issue. 2279 return UTF_8.decode(buff).toString(); 2280 } 2281 2282 /** 2283 * Same as {@link #fromHex(String)} except expects spaces between the byte strings. 2284 * 2285 * @param hex The string to convert to a byte array. 2286 * @return A new byte array. 2287 */ 2288 public static byte[] fromSpacedHex(String hex) { 2289 var buff = ByteBuffer.allocate((hex.length() + 1) / 3); 2290 for (var i = 0; i < hex.length(); i += 3) 2291 buff.put((byte)Integer.parseInt(hex.substring(i, i + 2), 16)); 2292 buff.rewind(); 2293 return buff.array(); 2294 } 2295 2296 /** 2297 * Converts a space-deliminted hexadecimal byte stream (e.g. "34 A5 BC") into a UTF-8 encoded string. 2298 * 2299 * @param hex The hexadecimal string. 2300 * @return The UTF-8 string. 2301 */ 2302 public static String fromSpacedHexToUTF8(String hex) { 2303 var buff = ByteBuffer.allocate((hex.length() + 1) / 3); 2304 for (var i = 0; i < hex.length(); i += 3) 2305 buff.put((byte)Integer.parseInt(hex.substring(i, i + 2), 16)); 2306 buff.rewind(); // Fixes Java 11 issue. 2307 return UTF_8.decode(buff).toString(); 2308 } 2309 2310 /** 2311 * Generates a random UUID string in standard format. 2312 * 2313 * <p> 2314 * Returns a UUID in the format: <c>xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx</c> 2315 * 2316 * <h5 class='section'>Example:</h5> 2317 * <p class='bjava'> 2318 * generateUUID(); <jc>// "550e8400-e29b-41d4-a716-446655440000"</jc> 2319 * </p> 2320 * 2321 * @return A new random UUID string. 2322 */ 2323 public static String generateUUID() { 2324 return UUID.randomUUID().toString(); 2325 } 2326 2327 /** 2328 * Given an absolute URI, returns just the authority portion (e.g. <js>"http://hostname:port"</js>) 2329 * 2330 * @param s The URI string. 2331 * @return Just the authority portion of the URI. 2332 */ 2333 public static String getAuthorityUri(String s) { // NOSONAR - False positive. 2334 2335 // Use a state machine for maximum performance. 2336 2337 // S1: Looking for http 2338 // S2: Found http, looking for : 2339 // S3: Found :, looking for / 2340 // S4: Found /, looking for / 2341 // S5: Found /, looking for x 2342 // S6: Found x, looking for / 2343 2344 var state = S1; 2345 2346 for (var i = 0; i < s.length(); i++) { 2347 var c = s.charAt(i); 2348 if (state == S1) { 2349 if (isLowerCaseLetter(c)) 2350 state = S2; 2351 else 2352 return s; 2353 } else if (state == S2) { 2354 if (c == ':') 2355 state = S3; 2356 else if (! isLowerCaseLetter(c)) 2357 return s; 2358 } else if (state == S3) { // NOSONAR - False positive. 2359 if (c == '/') 2360 state = S4; 2361 else 2362 return s; 2363 } else if (state == S4) { 2364 if (c == '/') 2365 state = S5; 2366 else 2367 return s; 2368 } else if (state == S5) { 2369 if (c != '/') 2370 state = S6; 2371 else 2372 return s; 2373 } else /* state == S6 */ { 2374 if (c == '/') // NOSONAR - Intentional. 2375 return s.substring(0, i); 2376 } 2377 } 2378 2379 return s; 2380 } 2381 2382 /** 2383 * Parses a duration string. 2384 * 2385 * <p> 2386 * Supports simple duration formats: 2387 * 2388 * <h5 class='section'>Format Examples:</h5> 2389 * <ul> 2390 * <li><js>"1000"</js> - 1000 milliseconds (no suffix) 2391 * <li><js>"10s"</js> - 10 seconds 2392 * <li><js>"10 sec"</js> - 10 seconds 2393 * <li><js>"10 seconds"</js> - 10 seconds 2394 * <li><js>"1.5h"</js> - 1.5 hours (5400000 ms) 2395 * <li><js>"1h30m"</js> - 1 hour 30 minutes (5400000 ms) 2396 * <li><js>"1h 30m"</js> - 1 hour 30 minutes (with spaces) 2397 * </ul> 2398 * 2399 * <h5 class='section'>Supported Units:</h5> 2400 * <ul> 2401 * <li><b>Milliseconds:</b> <js>"ms"</js>, <js>"millis"</js>, <js>"milliseconds"</js> (or no suffix) 2402 * <li><b>Seconds:</b> <js>"s"</js>, <js>"sec"</js>, <js>"second"</js>, <js>"seconds"</js> 2403 * <li><b>Minutes:</b> <js>"m"</js>, <js>"min"</js>, <js>"minute"</js>, <js>"minutes"</js> 2404 * <li><b>Hours:</b> <js>"h"</js>, <js>"hour"</js>, <js>"hours"</js> 2405 * <li><b>Days:</b> <js>"d"</js>, <js>"day"</js>, <js>"days"</js> 2406 * <li><b>Weeks:</b> <js>"w"</js>, <js>"week"</js>, <js>"weeks"</js> 2407 * <li><b>Months:</b> <js>"mo"</js>, <js>"month"</js>, <js>"months"</js> (30 days) 2408 * <li><b>Years:</b> <js>"y"</js>, <js>"yr"</js>, <js>"year"</js>, <js>"years"</js> (365 days) 2409 * </ul> 2410 * 2411 * <p> 2412 * Suffixes are case-insensitive. 2413 * <br>Whitespace is ignored. 2414 * <br>Decimal values are supported (e.g., <js>"1.5h"</js>). 2415 * <br>Combined formats are supported (e.g., <js>"1h30m"</js>). 2416 * 2417 * @param s The string to parse. 2418 * @return 2419 * The time in milliseconds, or <c>-1</c> if the string is empty or <jk>null</jk>. 2420 */ 2421 public static long getDuration(String s) { 2422 s = trim(s); 2423 if (isEmpty(s)) 2424 return -1; 2425 2426 // Parse simple format (number + unit or combined format) 2427 var totalMs = 0L; 2428 var i = 0; 2429 var len = s.length(); 2430 2431 while (i < len) { 2432 // Skip whitespace 2433 while (i < len && Character.isWhitespace(s.charAt(i))) 2434 i++; 2435 2436 // Parse number (including decimal) 2437 var numStart = i; 2438 var hasDecimal = false; 2439 while (i < len) { 2440 var c = s.charAt(i); 2441 if (c >= '0' && c <= '9') { 2442 i++; 2443 } else if (c == '.' && !hasDecimal) { 2444 hasDecimal = true; 2445 i++; 2446 } else { 2447 break; 2448 } 2449 } 2450 2451 if (i == numStart) { 2452 // No number found, invalid format 2453 return -1; 2454 } 2455 2456 var numStr = s.substring(numStart, i).trim(); 2457 var value = Double.parseDouble(numStr); 2458 2459 // Skip whitespace 2460 while (i < len && Character.isWhitespace(s.charAt(i))) 2461 i++; 2462 2463 // Parse unit (read all letters until we hit a digit or whitespace) 2464 var unitStart = i; 2465 while (i < len && LETTER.contains(s.charAt(i))) 2466 i++; 2467 var unit = s.substring(unitStart, i).trim().toLowerCase(); 2468 2469 // Convert to milliseconds 2470 var ms = parseUnit(unit, value); 2471 if (ms < 0) 2472 return -1; 2473 totalMs += ms; 2474 } 2475 2476 return totalMs; 2477 } 2478 2479 /** 2480 * Converts a string containing glob-style wildcard characters to a regular expression {@link java.util.regex.Pattern}. 2481 * 2482 * <p>This method converts glob-style patterns to regular expressions with the following mappings: 2483 * <ul> 2484 * <li>{@code *} matches any sequence of characters (including none)</li> 2485 * <li>{@code ?} matches exactly one character</li> 2486 * <li>All other characters are treated literally</li> 2487 * </ul> 2488 * 2489 * <h5 class='section'>Example:</h5> 2490 * <p class='bjava'> 2491 * <jk>var</jk> <jv>pattern</jv> = <jsm>getGlobMatchPattern</jsm>(<js>"user_*_temp"</js>); 2492 * <jk>boolean</jk> <jv>matches</jv> = <jv>pattern</jv>.matcher(<js>"user_alice_temp"</js>).matches(); <jc>// true</jc> 2493 * <jv>matches</jv> = <jv>pattern</jv>.matcher(<js>"user_bob_temp"</js>).matches(); <jc>// true</jc> 2494 * <jv>matches</jv> = <jv>pattern</jv>.matcher(<js>"admin_alice_temp"</js>).matches(); <jc>// false</jc> 2495 * </p> 2496 * 2497 * @param s The glob-style wildcard pattern string. 2498 * @return A compiled {@link java.util.regex.Pattern} object, or <jk>null</jk> if the input string is <jk>null</jk>. 2499 */ 2500 public static java.util.regex.Pattern getGlobMatchPattern(String s) { 2501 return getGlobMatchPattern(s, 0); 2502 } 2503 2504 /** 2505 * Converts a string containing glob-style wildcard characters to a regular expression {@link java.util.regex.Pattern} with flags. 2506 * 2507 * <p>This method converts glob-style patterns to regular expressions with the following mappings: 2508 * <ul> 2509 * <li>{@code *} matches any sequence of characters (including none)</li> 2510 * <li>{@code ?} matches exactly one character</li> 2511 * <li>All other characters are treated literally</li> 2512 * </ul> 2513 * 2514 * <h5 class='section'>Example:</h5> 2515 * <p class='bjava'> 2516 * <jc>// Case-insensitive matching</jc> 2517 * <jk>var</jk> <jv>pattern</jv> = <jsm>getGlobMatchPattern</jsm>(<js>"USER_*"</js>, Pattern.<jsf>CASE_INSENSITIVE</jsf>); 2518 * <jk>boolean</jk> <jv>matches</jv> = <jv>pattern</jv>.matcher(<js>"user_alice"</js>).matches(); <jc>// true</jc> 2519 * </p> 2520 * 2521 * @param s The glob-style wildcard pattern string. 2522 * @param flags Regular expression flags (see {@link java.util.regex.Pattern} constants). 2523 * @return A compiled {@link java.util.regex.Pattern} object, or <jk>null</jk> if the input string is <jk>null</jk>. 2524 */ 2525 public static java.util.regex.Pattern getGlobMatchPattern(String s, int flags) { 2526 if (s == null) 2527 return null; 2528 var sb = new StringBuilder(); 2529 sb.append("\\Q"); 2530 for (var i = 0; i < s.length(); i++) { 2531 var c = s.charAt(i); 2532 if (c == '*') 2533 sb.append("\\E").append(".*").append("\\Q"); 2534 else if (c == '?') 2535 sb.append("\\E").append(".").append("\\Q"); 2536 else 2537 sb.append(c); 2538 } 2539 sb.append("\\E"); 2540 return java.util.regex.Pattern.compile(sb.toString(), flags); 2541 } 2542 2543 /** 2544 * Converts a string containing <js>"*"</js> meta characters with a regular expression pattern. 2545 * 2546 * @param s The string to create a pattern from. 2547 * @return A regular expression pattern. 2548 */ 2549 public static Pattern getMatchPattern(String s) { 2550 return getMatchPattern(s, 0); 2551 } 2552 2553 /** 2554 * Converts a string containing <js>"*"</js> meta characters with a regular expression pattern. 2555 * 2556 * @param s The string to create a pattern from. 2557 * @param flags Regular expression flags. 2558 * @return A regular expression pattern. 2559 */ 2560 public static Pattern getMatchPattern(String s, int flags) { 2561 if (s == null) 2562 return null; 2563 var sb = new StringBuilder(); 2564 sb.append("\\Q"); 2565 for (var i = 0; i < s.length(); i++) { 2566 var c = s.charAt(i); 2567 if (c == '*') 2568 sb.append("\\E").append(".*").append("\\Q"); 2569 else if (c == '?') 2570 sb.append("\\E").append(".").append("\\Q"); 2571 else 2572 sb.append(c); 2573 } 2574 sb.append("\\E"); 2575 return Pattern.compile(sb.toString(), flags); 2576 } 2577 2578 /** 2579 * Takes in a string, splits it by lines, and then prepends each line with line numbers. 2580 * 2581 * @param s The string. 2582 * @return The string with line numbers added. 2583 */ 2584 public static String getNumberedLines(String s) { 2585 return getNumberedLines(s, 1, Integer.MAX_VALUE); 2586 } 2587 2588 /** 2589 * Same as {@link #getNumberedLines(String)} except only returns the specified lines. 2590 * 2591 * <p> 2592 * Out-of-bounds values are allowed and fixed. 2593 * 2594 * @param s The string. 2595 * @param start The starting line (1-indexed). 2596 * @param end The ending line (1-indexed). 2597 * @return The string with line numbers added. 2598 */ 2599 public static String getNumberedLines(String s, int start, int end) { 2600 if (s == null) 2601 return null; 2602 var lines = s.split("[\r\n]+"); 2603 var digits = String.valueOf(lines.length).length(); 2604 if (start < 1) 2605 start = 1; 2606 if (end < 0) 2607 end = Integer.MAX_VALUE; 2608 if (end > lines.length) 2609 end = lines.length; 2610 var sb = new StringBuilder(); 2611 for (var l : l(lines).subList(start - 1, end)) 2612 sb.append(String.format("%0" + digits + "d", start++)).append(": ").append(l).append("\n"); // NOSONAR - Intentional. 2613 return sb.toString(); 2614 } 2615 2616 /** 2617 * Calculates the approximate memory size of a string in bytes. 2618 * 2619 * <p> 2620 * Returns <c>0</c> if the input string is <jk>null</jk>. 2621 * This method provides an estimate based on typical JVM object layout: 2622 * <ul> 2623 * <li>String object overhead: ~24 bytes (object header + fields)</li> 2624 * <li>char[] array overhead: ~16 bytes (array header)</li> 2625 * <li>Character data: 2 bytes per character</li> 2626 * </ul> 2627 * 2628 * <p> 2629 * <b>Note:</b> Actual memory usage may vary based on JVM implementation, object alignment, 2630 * and whether compressed OOPs are enabled. This is an approximation for informational purposes. 2631 * 2632 * <h5 class='section'>Examples:</h5> 2633 * <p class='bjava'> 2634 * getStringSize(<jk>null</jk>); <jc>// Returns: 0</jc> 2635 * getStringSize(<js>""</js>); <jc>// Returns: ~40 bytes</jc> 2636 * getStringSize(<js>"hello"</js>); <jc>// Returns: ~50 bytes (40 + 10)</jc> 2637 * getStringSize(<js>"test"</js>); <jc>// Returns: ~48 bytes (40 + 8)</jc> 2638 * </p> 2639 * 2640 * @param str The string to measure. Can be <jk>null</jk>. 2641 * @return The approximate memory size in bytes, or <c>0</c> if the input was <jk>null</jk>. 2642 */ 2643 public static long getStringSize(String str) { 2644 if (str == null) 2645 return 0; 2646 // String object overhead: ~24 bytes (object header + fields: value, hash, coder) 2647 // char[] array overhead: ~16 bytes (array header) 2648 // Character data: 2 bytes per character 2649 return 24L + 16L + (2L * str.length()); 2650 } 2651 2652 /** 2653 * Checks if a string has text (not null, not empty, and contains at least one non-whitespace character). 2654 * 2655 * <h5 class='section'>Example:</h5> 2656 * <p class='bjava'> 2657 * hasText(<jk>null</jk>); <jc>// false</jc> 2658 * hasText(<js>""</js>); <jc>// false</jc> 2659 * hasText(<js>" "</js>); <jc>// false</jc> 2660 * hasText(<js>"hello"</js>); <jc>// true</jc> 2661 * </p> 2662 * 2663 * @param str The string to check. 2664 * @return <jk>true</jk> if the string is not null, not empty, and contains at least one non-whitespace character. 2665 */ 2666 public static boolean hasText(String str) { 2667 return isNotBlank(str); 2668 } 2669 2670 /** 2671 * Same as {@link String#indexOf(int)} except allows you to check for multiple characters. 2672 * 2673 * @param s The string to check. 2674 * @param c The characters to check for. 2675 * @return The index into the string that is one of the specified characters. 2676 */ 2677 public static int indexOf(String s, char...c) { 2678 if (s == null) 2679 return -1; 2680 for (var i = 0; i < s.length(); i++) { 2681 var c2 = s.charAt(i); 2682 for (var cc : c) 2683 if (c2 == cc) 2684 return i; 2685 } 2686 return -1; 2687 } 2688 2689 /** 2690 * Finds the index of the first occurrence of a substring within a string. 2691 * 2692 * <h5 class='section'>Example:</h5> 2693 * <p class='bjava'> 2694 * indexOf(<js>"hello world"</js>, <js>"world"</js>); <jc>// 6</jc> 2695 * indexOf(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// -1</jc> 2696 * indexOf(<jk>null</jk>, <js>"test"</js>); <jc>// -1</jc> 2697 * </p> 2698 * 2699 * @param str The string to search in. 2700 * @param search The substring to search for. 2701 * @return The index of the first occurrence, or <c>-1</c> if not found or if either parameter is <jk>null</jk>. 2702 */ 2703 public static int indexOf(String str, String search) { 2704 if (str == null || search == null) 2705 return -1; 2706 return str.indexOf(search); 2707 } 2708 2709 /** 2710 * Finds the index of the first occurrence of a substring within a string, ignoring case. 2711 * 2712 * <h5 class='section'>Example:</h5> 2713 * <p class='bjava'> 2714 * indexOfIgnoreCase(<js>"Hello World"</js>, <js>"world"</js>); <jc>// 6</jc> 2715 * indexOfIgnoreCase(<js>"Hello World"</js>, <js>"WORLD"</js>); <jc>// 6</jc> 2716 * indexOfIgnoreCase(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// -1</jc> 2717 * </p> 2718 * 2719 * @param str The string to search in. 2720 * @param search The substring to search for. 2721 * @return The index of the first occurrence, or <c>-1</c> if not found or if either parameter is <jk>null</jk>. 2722 */ 2723 public static int indexOfIgnoreCase(String str, String search) { 2724 if (str == null || search == null) 2725 return -1; 2726 return str.toLowerCase().indexOf(search.toLowerCase()); 2727 } 2728 2729 /** 2730 * Interns a string, returning the canonical representation. 2731 * 2732 * <p> 2733 * Returns <jk>null</jk> if the input string is <jk>null</jk>. 2734 * This method provides a null-safe wrapper around {@link String#intern()}. 2735 * 2736 * <h5 class='section'>Examples:</h5> 2737 * <p class='bjava'> 2738 * String <jv>s1</jv> = <jk>new</jk> String(<js>"test"</js>); 2739 * String <jv>s2</jv> = <jk>new</jk> String(<js>"test"</js>); 2740 * assertTrue(<jv>s1</jv> != <jv>s2</jv>); <jc>// Different objects</jc> 2741 * 2742 * String <jv>i1</jv> = intern(<jv>s1</jv>); 2743 * String <jv>i2</jv> = intern(<jv>s2</jv>); 2744 * assertTrue(<jv>i1</jv> == <jv>i2</jv>); <jc>// Same interned object</jc> 2745 * </p> 2746 * 2747 * <h5 class='section'>Performance Note:</h5> 2748 * <p>String interning stores strings in a special pool, which can save memory when the same string 2749 * values are used repeatedly. However, the intern pool has limited size and interning can be slow, 2750 * so use judiciously for strings that are known to be repeated frequently.</p> 2751 * 2752 * @param str The string to intern. Can be <jk>null</jk>. 2753 * @return The interned string, or <jk>null</jk> if the input was <jk>null</jk>. 2754 */ 2755 public static String intern(String str) { 2756 return str == null ? null : str.intern(); 2757 } 2758 2759 /** 2760 * Interpolates variables in a template string using <js>"${name}"</js> syntax. 2761 * 2762 * <p> 2763 * Replaces variables of the form <js>"${name}"</js> with values from the map. 2764 * This is similar to shell variable interpolation syntax. 2765 * 2766 * <h5 class='section'>Example:</h5> 2767 * <p class='bjava'> 2768 * var vars = Map.of(<js>"name"</js>, <js>"John"</js>, <js>"city"</js>, <js>"New York"</js>); 2769 * interpolate(<js>"Hello ${name}, welcome to ${city}"</js>, vars); 2770 * <jc>// Returns: "Hello John, welcome to New York"</jc> 2771 * </p> 2772 * 2773 * @param template The template string with <js>"${name}"</js> variables. 2774 * @param variables The map containing the variable values. 2775 * @return The interpolated string with variables replaced, or the original template if variables is null or empty. 2776 */ 2777 public static String interpolate(String template, Map<String,Object> variables) { 2778 if (template == null) 2779 return null; 2780 if (variables == null || variables.isEmpty()) 2781 return template; 2782 2783 var result = new StringBuilder(); 2784 var i = 0; 2785 var length = template.length(); 2786 2787 while (i < length) { 2788 var dollarIndex = template.indexOf("${", i); 2789 if (dollarIndex == -1) { 2790 // No more variables, append the rest 2791 result.append(template.substring(i)); 2792 break; 2793 } 2794 2795 // Append text before the variable 2796 result.append(template.substring(i, dollarIndex)); 2797 2798 // Find the closing brace 2799 var braceIndex = template.indexOf('}', dollarIndex + 2); 2800 if (braceIndex == -1) { 2801 // No closing brace, append the rest as-is 2802 result.append(template.substring(dollarIndex)); 2803 break; 2804 } 2805 2806 // Extract variable name 2807 var varName = template.substring(dollarIndex + 2, braceIndex); 2808 var value = variables.get(varName); 2809 2810 if (variables.containsKey(varName)) { 2811 // Variable exists in map (even if null) 2812 result.append(value != null ? value.toString() : "null"); 2813 } else { 2814 // Variable not found, keep the original placeholder 2815 result.append("${").append(varName).append("}"); 2816 } 2817 2818 i = braceIndex + 1; 2819 } 2820 2821 return result.toString(); 2822 } 2823 2824 /** 2825 * Efficiently determines whether a URL is of the pattern "xxx://xxx" 2826 * 2827 * @param s The string to test. 2828 * @return <jk>true</jk> if it's an absolute path. 2829 */ 2830 public static boolean isAbsoluteUri(String s) { // NOSONAR - False positive. 2831 2832 if (isEmpty(s)) 2833 return false; 2834 2835 // Use a state machine for maximum performance. 2836 2837 // S1: Looking for http 2838 // S2: Found http, looking for : 2839 // S3: Found :, looking for / 2840 // S4: Found /, looking for / 2841 // S5: Found /, looking for x 2842 2843 var state = S1; 2844 2845 for (var i = 0; i < s.length(); i++) { 2846 var c = s.charAt(i); 2847 if (state == S1) { 2848 if (isLowerCaseLetter(c)) 2849 state = S2; 2850 else 2851 return false; 2852 } else if (state == S2) { 2853 if (c == ':') 2854 state = S3; 2855 else if (! isLowerCaseLetter(c)) 2856 return false; 2857 } else if (state == S3) { // NOSONAR - False positive. 2858 if (c == '/') 2859 state = S4; 2860 else 2861 return false; 2862 } else if (state == S4) { 2863 if (c == '/') 2864 state = S5; 2865 else 2866 return false; 2867 } else /* state == S5 */ { 2868 return true; 2869 } 2870 } 2871 return false; 2872 } 2873 2874 /** 2875 * Checks if all of the provided strings are not blank (not null, not empty, and not whitespace only). 2876 * 2877 * <p> 2878 * Returns <jk>true</jk> only if all strings are not null, not empty, and contain non-whitespace characters. 2879 * Returns <jk>false</jk> if the array is null or empty, or if any string is null, empty, or whitespace only. 2880 * 2881 * <h5 class='section'>Example:</h5> 2882 * <p class='bjava'> 2883 * isAllNotBlank(); <jc>// false</jc> 2884 * isAllNotBlank(<jk>null</jk>); <jc>// false</jc> 2885 * isAllNotBlank(<jk>null</jk>, <jk>null</jk>); <jc>// false</jc> 2886 * isAllNotBlank(<js>""</js>, <js>""</js>); <jc>// false</jc> 2887 * isAllNotBlank(<js>" "</js>, <js>" "</js>); <jc>// false</jc> 2888 * isAllNotBlank(<jk>null</jk>, <js>"hello"</js>); <jc>// false</jc> 2889 * isAllNotBlank(<js>""</js>, <js>" "</js>); <jc>// false</jc> 2890 * isAllNotBlank(<js>"hello"</js>, <js>" "</js>); <jc>// false</jc> 2891 * isAllNotBlank(<js>"hello"</js>); <jc>// true</jc> 2892 * isAllNotBlank(<js>"hello"</js>, <js>"world"</js>); <jc>// true</jc> 2893 * </p> 2894 * 2895 * @param values The strings to check. 2896 * @return <jk>true</jk> if all strings are not null, not empty, and not whitespace only, <jk>false</jk> otherwise. 2897 */ 2898 public static boolean isAllNotBlank(CharSequence...values) { 2899 if (values == null || values.length == 0) 2900 return false; 2901 for (CharSequence value : values) 2902 if (! isNotBlank(value)) 2903 return false; 2904 return true; 2905 } 2906 2907 /** 2908 * Checks if all of the provided strings are not empty (not null and not zero-length). 2909 * 2910 * <p> 2911 * Returns <jk>true</jk> only if all strings are not null and have a length greater than zero. 2912 * Returns <jk>false</jk> if the array is null or empty, or if any string is null or empty. 2913 * 2914 * <h5 class='section'>Example:</h5> 2915 * <p class='bjava'> 2916 * isAllNotEmpty(); <jc>// false</jc> 2917 * isAllNotEmpty(<jk>null</jk>); <jc>// false</jc> 2918 * isAllNotEmpty(<jk>null</jk>, <jk>null</jk>); <jc>// false</jc> 2919 * isAllNotEmpty(<js>""</js>, <js>""</js>); <jc>// false</jc> 2920 * isAllNotEmpty(<jk>null</jk>, <js>"hello"</js>); <jc>// false</jc> 2921 * isAllNotEmpty(<js>""</js>, <js>" "</js>); <jc>// false</jc> 2922 * isAllNotEmpty(<js>"hello"</js>); <jc>// true</jc> 2923 * isAllNotEmpty(<js>"hello"</js>, <js>"world"</js>); <jc>// true</jc> 2924 * isAllNotEmpty(<js>"hello"</js>, <js>" "</js>); <jc>// true</jc> 2925 * </p> 2926 * 2927 * @param values The strings to check. 2928 * @return <jk>true</jk> if all strings are not null and not empty, <jk>false</jk> otherwise. 2929 */ 2930 public static boolean isAllNotEmpty(CharSequence...values) { 2931 if (values == null || values.length == 0) 2932 return false; 2933 for (CharSequence value : values) 2934 if (value == null || value.isEmpty()) 2935 return false; 2936 return true; 2937 } 2938 2939 /** 2940 * Checks if a string contains only alphabetic characters (a-z, A-Z). 2941 * 2942 * <h5 class='section'>Example:</h5> 2943 * <p class='bjava'> 2944 * isAlpha(<jk>null</jk>); <jc>// false</jc> 2945 * isAlpha(<js>""</js>); <jc>// false</jc> 2946 * isAlpha(<js>"abc"</js>); <jc>// true</jc> 2947 * isAlpha(<js>"abc123"</js>); <jc>// false</jc> 2948 * isAlpha(<js>"abc def"</js>); <jc>// false</jc> 2949 * </p> 2950 * 2951 * @param str The string to check. 2952 * @return <jk>true</jk> if the string is not null, not empty, and contains only alphabetic characters. 2953 */ 2954 public static boolean isAlpha(String str) { 2955 if (isEmpty(str)) 2956 return false; 2957 for (var i = 0; i < str.length(); i++) { 2958 if (! LETTER.contains(str.charAt(i))) 2959 return false; 2960 } 2961 return true; 2962 } 2963 2964 /** 2965 * Checks if a string contains only alphanumeric characters (a-z, A-Z, 0-9). 2966 * 2967 * <h5 class='section'>Example:</h5> 2968 * <p class='bjava'> 2969 * isAlphaNumeric(<jk>null</jk>); <jc>// false</jc> 2970 * isAlphaNumeric(<js>""</js>); <jc>// false</jc> 2971 * isAlphaNumeric(<js>"abc"</js>); <jc>// true</jc> 2972 * isAlphaNumeric(<js>"abc123"</js>); <jc>// true</jc> 2973 * isAlphaNumeric(<js>"abc def"</js>); <jc>// false</jc> 2974 * isAlphaNumeric(<js>"abc-123"</js>); <jc>// false</jc> 2975 * </p> 2976 * 2977 * @param str The string to check. 2978 * @return <jk>true</jk> if the string is not null, not empty, and contains only alphanumeric characters. 2979 */ 2980 public static boolean isAlphaNumeric(String str) { 2981 if (isEmpty(str)) 2982 return false; 2983 for (var i = 0; i < str.length(); i++) { 2984 if (! (LETTER.contains(str.charAt(i)) || DIGIT.contains(str.charAt(i)))) 2985 return false; 2986 } 2987 return true; 2988 } 2989 2990 /** 2991 * Checks if any of the provided strings are not blank (not null, not empty, and not whitespace only). 2992 * 2993 * <p> 2994 * Returns <jk>true</jk> if at least one string is not null, not empty, and contains non-whitespace characters. 2995 * 2996 * <h5 class='section'>Example:</h5> 2997 * <p class='bjava'> 2998 * isAnyNotBlank(<jk>null</jk>, <jk>null</jk>); <jc>// false</jc> 2999 * isAnyNotBlank(<js>""</js>, <js>""</js>); <jc>// false</jc> 3000 * isAnyNotBlank(<js>" "</js>, <js>" "</js>); <jc>// false</jc> 3001 * isAnyNotBlank(<jk>null</jk>, <js>"hello"</js>); <jc>// true</jc> 3002 * isAnyNotBlank(<js>""</js>, <js>" "</js>, <js>"x"</js>);<jc>// true</jc> 3003 * isAnyNotBlank(<js>"hello"</js>, <js>"world"</js>); <jc>// true</jc> 3004 * </p> 3005 * 3006 * @param values The strings to check. 3007 * @return <jk>true</jk> if at least one string is not null, not empty, and contains non-whitespace characters. 3008 */ 3009 public static boolean isAnyNotBlank(CharSequence...values) { 3010 if (values == null) 3011 return false; 3012 for (CharSequence value : values) 3013 if (isNotBlank(value)) 3014 return true; 3015 return false; 3016 } 3017 3018 /** 3019 * Checks if any of the provided strings are not empty (not null and not zero-length). 3020 * 3021 * <p> 3022 * Returns <jk>true</jk> if at least one string is not null and has a length greater than zero. 3023 * 3024 * <h5 class='section'>Example:</h5> 3025 * <p class='bjava'> 3026 * isAnyNotEmpty(<jk>null</jk>, <jk>null</jk>); <jc>// false</jc> 3027 * isAnyNotEmpty(<js>""</js>, <js>""</js>); <jc>// false</jc> 3028 * isAnyNotEmpty(<jk>null</jk>, <js>"hello"</js>); <jc>// true</jc> 3029 * isAnyNotEmpty(<js>""</js>, <js>" "</js>); <jc>// true</jc> 3030 * isAnyNotEmpty(<js>"hello"</js>, <js>"world"</js>); <jc>// true</jc> 3031 * </p> 3032 * 3033 * @param values The strings to check. 3034 * @return <jk>true</jk> if at least one string is not null and not empty. 3035 */ 3036 public static boolean isAnyNotEmpty(CharSequence...values) { 3037 if (values == null) 3038 return false; 3039 for (CharSequence value : values) 3040 if (value != null && ! value.isEmpty()) 3041 return true; 3042 return false; 3043 } 3044 3045 /** 3046 * Checks if a string is blank (null, empty, or whitespace only). 3047 * 3048 * <h5 class='section'>Example:</h5> 3049 * <p class='bjava'> 3050 * isBlank(<jk>null</jk>); <jc>// true</jc> 3051 * isBlank(<js>""</js>); <jc>// true</jc> 3052 * isBlank(<js>" "</js>); <jc>// true</jc> 3053 * isBlank(<js>"hello"</js>); <jc>// false</jc> 3054 * </p> 3055 * 3056 * @param str The string to check. 3057 * @return <jk>true</jk> if the string is null, empty, or contains only whitespace characters. 3058 */ 3059 public static boolean isBlank(CharSequence str) { 3060 return str == null || str.toString().isBlank(); 3061 } 3062 3063 /** 3064 * Checks if a string is a valid credit card number using the Luhn algorithm. 3065 * 3066 * <p> 3067 * Validates credit card numbers by: 3068 * <ul> 3069 * <li>Removing spaces and hyphens</li> 3070 * <li>Checking that all remaining characters are digits</li> 3071 * <li>Verifying the number passes the Luhn algorithm check</li> 3072 * <li>Ensuring the number is between 13-19 digits (standard credit card length)</li> 3073 * </ul> 3074 * 3075 * <h5 class='section'>Example:</h5> 3076 * <p class='bjava'> 3077 * isCreditCard(<jk>null</jk>); <jc>// false</jc> 3078 * isCreditCard(<js>""</js>); <jc>// false</jc> 3079 * isCreditCard(<js>"4532015112830366"</js>); <jc>// true (Visa test card)</jc> 3080 * isCreditCard(<js>"4532-0151-1283-0366"</js>); <jc>// true (with separators)</jc> 3081 * isCreditCard(<js>"1234567890"</js>); <jc>// false (invalid Luhn)</jc> 3082 * </p> 3083 * 3084 * @param str The string to check. 3085 * @return <jk>true</jk> if the string is a valid credit card number. 3086 */ 3087 public static boolean isCreditCard(String str) { 3088 if (isEmpty(str)) 3089 return false; 3090 // Remove spaces and hyphens 3091 var cleaned = str.replaceAll("[\\s\\-]", ""); 3092 // Must be all digits and 13-19 digits long 3093 if (! cleaned.matches("^\\d{13,19}$")) 3094 return false; 3095 // Apply Luhn algorithm 3096 var sum = 0; 3097 var alternate = false; 3098 for (var i = cleaned.length() - 1; i >= 0; i--) { 3099 var digit = Character.getNumericValue(cleaned.charAt(i)); 3100 if (alternate) { 3101 digit *= 2; 3102 if (digit > 9) 3103 digit = (digit % 10) + 1; 3104 } 3105 sum += digit; 3106 alternate = ! alternate; 3107 } 3108 return (sum % 10) == 0; 3109 } 3110 3111 /** 3112 * Returns <jk>true</jk> if the specified string is numeric. 3113 * 3114 * @param s The string to check. 3115 * @return <jk>true</jk> if the specified string is numeric. 3116 */ 3117 public static boolean isDecimal(String s) { 3118 if (s == null || s.isEmpty() || ! FIRST_NUMBER_CHARS.contains(s.charAt(0))) 3119 return false; 3120 var i = 0; 3121 var length = s.length(); 3122 var c = s.charAt(0); 3123 var isPrefixed = false; 3124 if (c == '+' || c == '-') { 3125 isPrefixed = true; 3126 i++; 3127 } 3128 if (i == length) 3129 return false; 3130 c = s.charAt(i++); 3131 if (c == '0' && length > (isPrefixed ? 2 : 1)) { 3132 c = s.charAt(i++); 3133 if (c == 'x' || c == 'X') { 3134 for (var j = i; j < length; j++) { 3135 if (! HEXADECIMAL_CHARS.contains(s.charAt(j))) 3136 return false; 3137 } 3138 } else if (OCTAL_CHARS.contains(c)) { 3139 for (var j = i; j < length; j++) 3140 if (! OCTAL_CHARS.contains(s.charAt(j))) 3141 return false; 3142 } else { 3143 return false; 3144 } 3145 } else if (c == '#') { 3146 for (var j = i; j < length; j++) { 3147 if (! HEXADECIMAL_CHARS.contains(s.charAt(j))) 3148 return false; 3149 } 3150 } else if (DECIMAL_CHARS.contains(c)) { 3151 for (var j = i; j < length; j++) 3152 if (! DECIMAL_CHARS.contains(s.charAt(j))) 3153 return false; 3154 } else { 3155 return false; 3156 } 3157 return true; 3158 } 3159 3160 /** 3161 * Checks if a string contains only digit characters (0-9). 3162 * 3163 * <h5 class='section'>Example:</h5> 3164 * <p class='bjava'> 3165 * isDigit(<jk>null</jk>); <jc>// false</jc> 3166 * isDigit(<js>""</js>); <jc>// false</jc> 3167 * isDigit(<js>"123"</js>); <jc>// true</jc> 3168 * isDigit(<js>"abc123"</js>); <jc>// false</jc> 3169 * isDigit(<js>"12.3"</js>); <jc>// false</jc> 3170 * </p> 3171 * 3172 * @param str The string to check. 3173 * @return <jk>true</jk> if the string is not null, not empty, and contains only digit characters. 3174 */ 3175 public static boolean isDigit(String str) { 3176 if (isEmpty(str)) 3177 return false; 3178 for (var i = 0; i < str.length(); i++) { 3179 if (! DIGIT.contains(str.charAt(i))) 3180 return false; 3181 } 3182 return true; 3183 } 3184 3185 /** 3186 * Checks if a string is a valid email address. 3187 * 3188 * <p> 3189 * Performs basic email validation using a simple regex pattern. 3190 * This is not a complete RFC 5321/5322 validation, but covers most common email formats. 3191 * 3192 * <h5 class='section'>Example:</h5> 3193 * <p class='bjava'> 3194 * isEmail(<jk>null</jk>); <jc>// false</jc> 3195 * isEmail(<js>""</js>); <jc>// false</jc> 3196 * isEmail(<js>"user@example.com"</js>); <jc>// true</jc> 3197 * isEmail(<js>"invalid.email"</js>); <jc>// false</jc> 3198 * </p> 3199 * 3200 * @param str The string to check. 3201 * @return <jk>true</jk> if the string is a valid email address. 3202 */ 3203 public static boolean isEmail(String str) { 3204 if (isEmpty(str)) 3205 return false; 3206 // Basic email regex: local@domain 3207 // Allows letters, digits, dots, underscores, hyphens, and plus signs in local part 3208 // Domain must have at least one dot and valid TLD 3209 return str.matches("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"); 3210 } 3211 3212 /** 3213 * Checks if a string is null or empty. 3214 * 3215 * <h5 class='section'>Example:</h5> 3216 * <p class='bjava'> 3217 * isEmpty(<jk>null</jk>); <jc>// true</jc> 3218 * isEmpty(<js>""</js>); <jc>// true</jc> 3219 * isEmpty(<js>"abc"</js>); <jc>// false</jc> 3220 * </p> 3221 * 3222 * @param str The string to check. 3223 * @return <jk>true</jk> if the string is null or empty. 3224 */ 3225 public static boolean isEmpty(String str) { 3226 return str == null || str.isEmpty(); 3227 } 3228 3229 /** 3230 * Returns <jk>true</jk> if the specified character is a valid first character for a number. 3231 * 3232 * @param c The character to test. 3233 * @return <jk>true</jk> if the specified character is a valid first character for a number. 3234 */ 3235 public static boolean isFirstNumberChar(char c) { 3236 return FIRST_NUMBER_CHARS.contains(c); 3237 } 3238 3239 /** 3240 * Returns <jk>true</jk> if the specified string is a floating point number. 3241 * 3242 * @param s The string to check. 3243 * @return <jk>true</jk> if the specified string is a floating point number. 3244 */ 3245 public static boolean isFloat(String s) { 3246 if (s == null || s.isEmpty()) 3247 return false; 3248 if (! FIRST_NUMBER_CHARS.contains(s.charAt(0))) 3249 return (s.equals("NaN") || s.equals("Infinity")); 3250 var i = 0; 3251 var length = s.length(); 3252 var c = s.charAt(0); 3253 if (c == '+' || c == '-') 3254 i++; 3255 if (i == length) 3256 return false; 3257 c = s.charAt(i); 3258 if (c == '.' || DECIMAL_CHARS.contains(c)) { 3259 return FP_REGEX.matcher(s).matches(); 3260 } 3261 return false; 3262 } 3263 3264 /** 3265 * Checks if a string is already interned. 3266 * 3267 * <p> 3268 * Returns <jk>false</jk> if the input string is <jk>null</jk>. 3269 * A string is considered interned if it is the same object reference as its interned version. 3270 * 3271 * <h5 class='section'>Examples:</h5> 3272 * <p class='bjava'> 3273 * String <jv>s1</jv> = <js>"test"</js>; <jc>// String literal is automatically interned</jc> 3274 * assertTrue(isInterned(<jv>s1</jv>)); 3275 * 3276 * String <jv>s2</jv> = <jk>new</jk> String(<js>"test"</js>); <jc>// New object, not interned</jc> 3277 * assertFalse(isInterned(<jv>s2</jv>)); 3278 * 3279 * String <jv>s3</jv> = intern(<jv>s2</jv>); <jc>// Now interned</jc> 3280 * assertTrue(isInterned(<jv>s3</jv>)); 3281 * </p> 3282 * 3283 * @param str The string to check. Can be <jk>null</jk>. 3284 * @return <jk>true</jk> if the string is interned, <jk>false</jk> otherwise. 3285 */ 3286 public static boolean isInterned(String str) { 3287 if (str == null) 3288 return false; 3289 return str == str.intern(); 3290 } 3291 3292 /** 3293 * Returns <jk>true</jk> if the specified string appears to be valid JSON. 3294 * 3295 * <p> 3296 * This method performs a simple heuristic check and does not strictly validate JSON syntax. 3297 * Leading and trailing spaces are ignored. 3298 * <br>Leading and trailing comments are not allowed. 3299 * 3300 * @param s The string to test. 3301 * @return <jk>true</jk> if the specified string appears to be valid JSON. 3302 */ 3303 public static boolean isProbablyJson(String s) { 3304 if (s == null) 3305 return false; 3306 var c1 = firstNonWhitespaceChar(s); 3307 var c2 = lastNonWhitespaceChar(s); 3308 if (c1 == '{' && c2 == '}' || c1 == '[' && c2 == ']' || c1 == '\'' && c2 == '\'') 3309 return true; 3310 return (isOneOf(s, "true", "false", "null") || isNumeric(s)); 3311 } 3312 3313 /** 3314 * Returns <jk>true</jk> if the specified string appears to be a JSON array. 3315 * 3316 * <p> 3317 * This method performs a simple heuristic check and does not strictly validate JSON syntax. 3318 * 3319 * @param o The object to test. 3320 * @param ignoreWhitespaceAndComments If <jk>true</jk>, leading and trailing whitespace and comments will be ignored. 3321 * @return <jk>true</jk> if the specified string appears to be a JSON array. 3322 */ 3323 public static boolean isProbablyJsonArray(Object o, boolean ignoreWhitespaceAndComments) { 3324 if (o instanceof CharSequence o2) { 3325 var s = o2.toString(); 3326 if (! ignoreWhitespaceAndComments) 3327 return (s.startsWith("[") && s.endsWith("]")); 3328 if (firstRealCharacter(s) != '[') 3329 return false; 3330 var i = s.lastIndexOf(']'); 3331 if (i == -1) 3332 return false; 3333 s = s.substring(i + 1); 3334 return firstRealCharacter(s) == -1; 3335 } 3336 return false; 3337 } 3338 3339 /** 3340 * Returns <jk>true</jk> if the specified string appears to be a JSON object. 3341 * 3342 * <p> 3343 * This method performs a simple heuristic check and does not strictly validate JSON syntax. 3344 * 3345 * @param o The object to test. 3346 * @param ignoreWhitespaceAndComments If <jk>true</jk>, leading and trailing whitespace and comments will be ignored. 3347 * @return <jk>true</jk> if the specified string appears to be a JSON object. 3348 */ 3349 public static boolean isProbablyJsonObject(Object o, boolean ignoreWhitespaceAndComments) { 3350 if (o instanceof CharSequence o2) { 3351 var s = o2.toString(); 3352 if (! ignoreWhitespaceAndComments) 3353 return (s.startsWith("{") && s.endsWith("}")); 3354 if (firstRealCharacter(s) != '{') 3355 return false; 3356 var i = s.lastIndexOf('}'); 3357 if (i == -1) 3358 return false; 3359 s = s.substring(i + 1); 3360 return firstRealCharacter(s) == -1; 3361 } 3362 return false; 3363 } 3364 3365 /** 3366 * Checks if a string is not blank (not null, not empty, and not whitespace only). 3367 * 3368 * <h5 class='section'>Example:</h5> 3369 * <p class='bjava'> 3370 * isNotBlank(<jk>null</jk>); <jc>// false</jc> 3371 * isNotBlank(<js>""</js>); <jc>// false</jc> 3372 * isNotBlank(<js>" "</js>); <jc>// false</jc> 3373 * isNotBlank(<js>"hello"</js>); <jc>// true</jc> 3374 * </p> 3375 * 3376 * @param str The string to check. 3377 * @return <jk>true</jk> if the string is not null, not empty, and contains non-whitespace characters. 3378 */ 3379 public static boolean isNotBlank(CharSequence str) { 3380 return ! isBlank(str); 3381 } 3382 3383 /** 3384 * Returns <jk>true</jk> if the specified character is a valid number character. 3385 * 3386 * @param c The character to check. 3387 * @return <jk>true</jk> if the specified character is a valid number character. 3388 */ 3389 public static boolean isNumberChar(char c) { 3390 return NUMBER_CHARS.contains(c); 3391 } 3392 3393 /** 3394 * Returns <jk>true</jk> if this string can be parsed by {@link #parseNumber(String, Class)}. 3395 * 3396 * @param s The string to check. 3397 * @return <jk>true</jk> if this string can be parsed without causing an exception. 3398 */ 3399 public static boolean isNumeric(String s) { 3400 if (s == null || s.isEmpty() || ! isFirstNumberChar(s.charAt(0))) 3401 return false; 3402 return isDecimal(s) || isFloat(s); 3403 } 3404 3405 /** 3406 * Returns <jk>true</jk> if the specified string is one of the specified values. 3407 * 3408 * @param s 3409 * The string to test. 3410 * Can be <jk>null</jk>. 3411 * @param values 3412 * The values to test. 3413 * Can contain <jk>null</jk>. 3414 * @return <jk>true</jk> if the specified string is one of the specified values. 3415 */ 3416 public static boolean isOneOf(String s, String...values) { 3417 assertArgNotNull("values", values); 3418 for (var value : values) 3419 if (eq(s, value)) 3420 return true; 3421 return false; 3422 } 3423 3424 /** 3425 * Checks if a string is a valid phone number. 3426 * 3427 * <p> 3428 * Performs basic phone number validation. 3429 * Accepts various formats including: 3430 * <ul> 3431 * <li>Digits only: <js>"1234567890"</js></li> 3432 * <li>With separators: <js>"(123) 456-7890"</js>, <js>"123-456-7890"</js>, <js>"123.456.7890"</js></li> 3433 * <li>With country code: <js>"+1 123-456-7890"</js></li> 3434 * </ul> 3435 * 3436 * <h5 class='section'>Example:</h5> 3437 * <p class='bjava'> 3438 * isPhoneNumber(<jk>null</jk>); <jc>// false</jc> 3439 * isPhoneNumber(<js>""</js>); <jc>// false</jc> 3440 * isPhoneNumber(<js>"1234567890"</js>); <jc>// true</jc> 3441 * isPhoneNumber(<js>"(123) 456-7890"</js>); <jc>// true</jc> 3442 * isPhoneNumber(<js>"123"</js>); <jc>// false</jc> 3443 * </p> 3444 * 3445 * @param str The string to check. 3446 * @return <jk>true</jk> if the string is a valid phone number. 3447 */ 3448 public static boolean isPhoneNumber(String str) { 3449 if (isEmpty(str)) 3450 return false; 3451 // Remove common phone number separators and check if remaining is 10-15 digits 3452 // Allows: digits, spaces, parentheses, hyphens, dots, plus sign (for country code) 3453 var cleaned = str.replaceAll("[\\s()\\-\\.]", ""); 3454 if (cleaned.startsWith("+")) 3455 cleaned = cleaned.substring(1); 3456 // Phone numbers should have 10-15 digits (10 for US, up to 15 for international) 3457 return cleaned.matches("^\\d{10,15}$"); 3458 } 3459 3460 /** 3461 * Checks if two strings are similar based on a similarity threshold. 3462 * 3463 * <p> 3464 * Uses the {@link #similarity(String, String)} method to calculate similarity and compares it to the threshold. 3465 * 3466 * <h5 class='section'>Example:</h5> 3467 * <p class='bjava'> 3468 * isSimilar(<js>"hello"</js>, <js>"hello"</js>, <js>0.8</js>); <jc>// true</jc> 3469 * isSimilar(<js>"kitten"</js>, <js>"sitting"</js>, <js>0.8</js>); <jc>// false</jc> 3470 * isSimilar(<js>"kitten"</js>, <js>"sitting"</js>, <js>0.5</js>); <jc>// true</jc> 3471 * </p> 3472 * 3473 * @param str1 The first string. 3474 * @param str2 The second string. 3475 * @param threshold The similarity threshold (0.0 to 1.0). 3476 * @return <jk>true</jk> if the similarity is greater than or equal to the threshold, <jk>false</jk> otherwise. 3477 */ 3478 public static boolean isSimilar(String str1, String str2, double threshold) { 3479 return similarity(str1, str2) >= threshold; 3480 } 3481 3482 /** 3483 * Efficiently determines whether a URL is of the pattern "xxx:/xxx". 3484 * 3485 * <p> 3486 * The pattern matched is: <c>[a-z]{2,}\:\/.*</c> 3487 * 3488 * <p> 3489 * Note that this excludes filesystem paths such as <js>"C:/temp"</js>. 3490 * 3491 * @param s The string to test. 3492 * @return <jk>true</jk> if it's an absolute path. 3493 */ 3494 public static boolean isUri(String s) { // NOSONAR - False positive. 3495 3496 if (isEmpty(s)) 3497 return false; 3498 3499 // Use a state machine for maximum performance. 3500 3501 // S1: Looking for protocol char 1 3502 // S2: Found protocol char 1, looking for protocol char 2 3503 // S3: Found protocol char 2, looking for : 3504 // S4: Found :, looking for / 3505 3506 var state = S1; 3507 3508 for (var i = 0; i < s.length(); i++) { 3509 var c = s.charAt(i); 3510 if (state == S1) { 3511 if (isLowerCaseLetter(c)) 3512 state = S2; 3513 else 3514 return false; 3515 } else if (state == S2) { 3516 if (isLowerCaseLetter(c)) 3517 state = S3; 3518 else 3519 return false; 3520 } else if (state == S3) { // NOSONAR - False positive. 3521 if (c == ':') 3522 state = S4; 3523 else if (! isLowerCaseLetter(c)) 3524 return false; 3525 } else /* state == S4 */ { 3526 return c == '/'; 3527 } 3528 } 3529 return false; 3530 } 3531 3532 private static boolean isLowerCaseLetter(char c) { 3533 if (c < 'a') 3534 return false; 3535 if (c > 'z') 3536 return false; 3537 return true; 3538 } 3539 3540 /** 3541 * Validates if a date string matches the specified date format. 3542 * 3543 * <p> 3544 * Uses {@link SimpleDateFormat} to parse the date string according to the format pattern. 3545 * 3546 * <h5 class='section'>Example:</h5> 3547 * <p class='bjava'> 3548 * isValidDateFormat(<js>"2023-12-25"</js>, <js>"yyyy-MM-dd"</js>); <jc>// true</jc> 3549 * isValidDateFormat(<js>"25/12/2023"</js>, <js>"dd/MM/yyyy"</js>); <jc>// true</jc> 3550 * isValidDateFormat(<js>"2023-13-25"</js>, <js>"yyyy-MM-dd"</js>); <jc>// false (invalid month)</jc> 3551 * </p> 3552 * 3553 * @param dateStr The date string to validate. Can be <jk>null</jk>. 3554 * @param format The date format pattern (e.g., "yyyy-MM-dd"). Can be <jk>null</jk>. 3555 * @return <jk>true</jk> if the date string matches the format, <jk>false</jk> otherwise. 3556 */ 3557 public static boolean isValidDateFormat(String dateStr, String format) { 3558 if (isEmpty(dateStr) || isEmpty(format)) 3559 return false; 3560 try { 3561 var sdf = new SimpleDateFormat(format); 3562 sdf.setLenient(false); // Strict parsing 3563 sdf.parse(dateStr); 3564 return true; 3565 } catch (@SuppressWarnings("unused") ParseException | IllegalArgumentException e) { 3566 // IllegalArgumentException thrown for invalid format patterns 3567 return false; 3568 } 3569 } 3570 3571 /** 3572 * Validates if a string is a valid hostname. 3573 * 3574 * <p> 3575 * Validates hostnames according to RFC 1123. A valid hostname: 3576 * <ul> 3577 * <li>Can contain letters, digits, and hyphens</li> 3578 * <li>Cannot start or end with a hyphen</li> 3579 * <li>Each label (dot-separated part) can be up to 63 characters</li> 3580 * <li>Total length can be up to 253 characters</li> 3581 * <li>Labels cannot be empty</li> 3582 * </ul> 3583 * 3584 * <h5 class='section'>Example:</h5> 3585 * <p class='bjava'> 3586 * isValidHostname(<js>"example.com"</js>); <jc>// true</jc> 3587 * isValidHostname(<js>"sub.example.com"</js>); <jc>// true</jc> 3588 * isValidHostname(<js>"-invalid.com"</js>); <jc>// false (starts with hyphen)</jc> 3589 * isValidHostname(<js>"example..com"</js>); <jc>// false (empty label)</jc> 3590 * </p> 3591 * 3592 * @param hostname The hostname string to validate. Can be <jk>null</jk>. 3593 * @return <jk>true</jk> if the string is a valid hostname, <jk>false</jk> otherwise. 3594 */ 3595 public static boolean isValidHostname(String hostname) { 3596 if (isEmpty(hostname)) 3597 return false; 3598 3599 // Cannot start or end with a dot 3600 if (hostname.startsWith(".") || hostname.endsWith(".")) 3601 return false; 3602 3603 // Total length cannot exceed 253 characters 3604 if (hostname.length() > 253) 3605 return false; 3606 3607 // Split by dots (use -1 to preserve trailing empty strings) 3608 var labels = hostname.split("\\.", -1); 3609 3610 // Check each label 3611 for (var label : labels) { 3612 // Label cannot be empty 3613 if (label.isEmpty()) 3614 return false; 3615 3616 // Label cannot exceed 63 characters 3617 if (label.length() > 63) 3618 return false; 3619 3620 // Label cannot start or end with hyphen 3621 if (label.startsWith("-") || label.endsWith("-")) 3622 return false; 3623 3624 // Label can only contain letters, digits, and hyphens 3625 if (! label.matches("^[a-zA-Z0-9-]+$")) 3626 return false; 3627 } 3628 3629 return true; 3630 } 3631 3632 /** 3633 * Validates if a string is a valid IP address (IPv4 or IPv6). 3634 * 3635 * <p> 3636 * Supports both IPv4 (e.g., "192.168.1.1") and IPv6 (e.g., "2001:0db8:85a3:0000:0000:8a2e:0370:7334") formats. 3637 * 3638 * <h5 class='section'>Example:</h5> 3639 * <p class='bjava'> 3640 * isValidIpAddress(<js>"192.168.1.1"</js>); <jc>// true</jc> 3641 * isValidIpAddress(<js>"2001:0db8:85a3::8a2e:0370:7334"</js>); <jc>// true</jc> 3642 * isValidIpAddress(<js>"256.1.1.1"</js>); <jc>// false</jc> 3643 * isValidIpAddress(<js>"not.an.ip"</js>); <jc>// false</jc> 3644 * </p> 3645 * 3646 * @param ip The IP address string to validate. Can be <jk>null</jk>. 3647 * @return <jk>true</jk> if the string is a valid IP address, <jk>false</jk> otherwise. 3648 */ 3649 public static boolean isValidIpAddress(String ip) { 3650 if (isEmpty(ip)) 3651 return false; 3652 try { 3653 // Try IPv4 first 3654 if (ip.contains(".") && ! ip.contains(":")) { 3655 var parts = ip.split("\\."); 3656 if (parts.length != 4) 3657 return false; 3658 for (var part : parts) { 3659 var num = Integer.parseInt(part); 3660 if (num < 0 || num > 255) 3661 return false; 3662 } 3663 return true; 3664 } 3665 // Try IPv6 - validate format without network operations 3666 if (ip.contains(":")) { 3667 return isValidIPv6Address(ip); 3668 } 3669 return false; 3670 } catch (@SuppressWarnings("unused") NumberFormatException e) { 3671 return false; 3672 } 3673 } 3674 3675 /** 3676 * Validates if a string is a valid MAC address. 3677 * 3678 * <p> 3679 * Supports common MAC address formats: 3680 * <ul> 3681 * <li>Colon-separated: <js>"00:1B:44:11:3A:B7"</js></li> 3682 * <li>Hyphen-separated: <js>"00-1B-44-11-3A-B7"</js></li> 3683 * <li>No separators: <js>"001B44113AB7"</js></li> 3684 * </ul> 3685 * 3686 * <h5 class='section'>Example:</h5> 3687 * <p class='bjava'> 3688 * isValidMacAddress(<js>"00:1B:44:11:3A:B7"</js>); <jc>// true</jc> 3689 * isValidMacAddress(<js>"00-1B-44-11-3A-B7"</js>); <jc>// true</jc> 3690 * isValidMacAddress(<js>"001B44113AB7"</js>); <jc>// true</jc> 3691 * isValidMacAddress(<js>"00:1B:44:11:3A"</js>); <jc>// false (too short)</jc> 3692 * </p> 3693 * 3694 * @param mac The MAC address string to validate. Can be <jk>null</jk>. 3695 * @return <jk>true</jk> if the string is a valid MAC address, <jk>false</jk> otherwise. 3696 */ 3697 public static boolean isValidMacAddress(String mac) { 3698 if (isEmpty(mac)) 3699 return false; 3700 3701 // Remove separators and check if it's 12 hex digits 3702 var cleaned = mac.replaceAll("[:-]", "").toUpperCase(); 3703 if (cleaned.length() != 12) 3704 return false; 3705 3706 // Check if all characters are valid hex digits 3707 return cleaned.matches("^[0-9A-F]{12}$"); 3708 } 3709 3710 /** 3711 * Validates if a string is a valid regular expression pattern. 3712 * 3713 * <p> 3714 * Attempts to compile the regex pattern to verify it's syntactically correct. 3715 * 3716 * <h5 class='section'>Example:</h5> 3717 * <p class='bjava'> 3718 * isValidRegex(<js>"[a-z]+"</js>); <jc>// true</jc> 3719 * isValidRegex(<js>"[a-z"</js>); <jc>// false (unclosed bracket)</jc> 3720 * isValidRegex(<js>"(test"</js>); <jc>// false (unclosed parenthesis)</jc> 3721 * </p> 3722 * 3723 * @param regex The regex pattern to validate. Can be <jk>null</jk>. 3724 * @return <jk>true</jk> if the string is a valid regex pattern, <jk>false</jk> otherwise. 3725 */ 3726 public static boolean isValidRegex(String regex) { 3727 if (isEmpty(regex)) 3728 return false; 3729 try { 3730 Pattern.compile(regex); 3731 return true; 3732 } catch (@SuppressWarnings("unused") PatternSyntaxException e) { 3733 return false; 3734 } 3735 } 3736 3737 /** 3738 * Validates if a time string matches the specified time format. 3739 * 3740 * <p> 3741 * Uses {@link SimpleDateFormat} to parse the time string according to the format pattern. 3742 * 3743 * <h5 class='section'>Example:</h5> 3744 * <p class='bjava'> 3745 * isValidTimeFormat(<js>"14:30:00"</js>, <js>"HH:mm:ss"</js>); <jc>// true</jc> 3746 * isValidTimeFormat(<js>"2:30 PM"</js>, <js>"h:mm a"</js>); <jc>// true</jc> 3747 * isValidTimeFormat(<js>"25:00:00"</js>, <js>"HH:mm:ss"</js>); <jc>// false (invalid hour)</jc> 3748 * </p> 3749 * 3750 * @param timeStr The time string to validate. Can be <jk>null</jk>. 3751 * @param format The time format pattern (e.g., "HH:mm:ss"). Can be <jk>null</jk>. 3752 * @return <jk>true</jk> if the time string matches the format, <jk>false</jk> otherwise. 3753 */ 3754 public static boolean isValidTimeFormat(String timeStr, String format) { 3755 if (isEmpty(timeStr) || isEmpty(format)) 3756 return false; 3757 try { 3758 var sdf = new SimpleDateFormat(format); 3759 sdf.setLenient(false); // Strict parsing 3760 sdf.parse(timeStr); 3761 return true; 3762 } catch (@SuppressWarnings("unused") ParseException | IllegalArgumentException e) { 3763 // IllegalArgumentException thrown for invalid format patterns 3764 return false; 3765 } 3766 } 3767 3768 /** 3769 * Checks if a character is whitespace. 3770 * 3771 * @param c The character to check. 3772 * @return <jk>true</jk> if the character is whitespace. 3773 */ 3774 public static boolean isWhitespace(int c) { 3775 return Character.isWhitespace(c); 3776 } 3777 3778 /** 3779 * Checks if a string contains only whitespace characters. 3780 * 3781 * <h5 class='section'>Example:</h5> 3782 * <p class='bjava'> 3783 * isWhitespace(<jk>null</jk>); <jc>// false</jc> 3784 * isWhitespace(<js>""</js>); <jc>// true</jc> 3785 * isWhitespace(<js>" "</js>); <jc>// true</jc> 3786 * isWhitespace(<js>"\t\n"</js>); <jc>// true</jc> 3787 * isWhitespace(<js>" a "</js>); <jc>// false</jc> 3788 * </p> 3789 * 3790 * @param str The string to check. 3791 * @return <jk>true</jk> if the string is not null and contains only whitespace characters (or is empty). 3792 */ 3793 public static boolean isWhitespace(String str) { 3794 if (str == null) 3795 return false; 3796 if (str.isEmpty()) 3797 return true; 3798 for (var i = 0; i < str.length(); i++) { 3799 if (! isWhitespace(str.charAt(i))) 3800 return false; 3801 } 3802 return true; 3803 } 3804 3805 /** 3806 * Combines collection values into a simple comma-delimited string. 3807 * 3808 * @param values The values to join. 3809 * @return A comma-delimited string. 3810 */ 3811 public static String join(Collection<?> values) { 3812 return joine(toList(values), ','); 3813 } 3814 3815 /** 3816 * Join the specified tokens into a delimited string. 3817 * 3818 * @param tokens The tokens to join. 3819 * @param d The delimiter. 3820 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3821 */ 3822 public static String join(Collection<?> tokens, char d) { 3823 if (tokens == null) 3824 return null; 3825 var sb = new StringBuilder(); 3826 for (var iter = tokens.iterator(); iter.hasNext();) { 3827 sb.append(iter.next()); 3828 if (iter.hasNext()) 3829 sb.append(d); 3830 } 3831 return sb.toString(); 3832 } 3833 3834 /** 3835 * Join the specified tokens into a delimited string. 3836 * 3837 * @param tokens The tokens to join. 3838 * @param d The delimiter. 3839 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3840 */ 3841 public static String join(Collection<?> tokens, String d) { 3842 if (tokens == null) 3843 return null; 3844 return StringUtils.join(tokens, d, new StringBuilder()).toString(); 3845 } 3846 3847 /** 3848 * Joins the specified tokens into a delimited string and writes the output to the specified string builder. 3849 * 3850 * @param tokens The tokens to join. 3851 * @param d The delimiter. 3852 * @param sb The string builder to append the response to. 3853 * @return The same string builder passed in as <c>sb</c>. 3854 */ 3855 public static StringBuilder join(Collection<?> tokens, String d, StringBuilder sb) { 3856 if (tokens == null) 3857 return sb; 3858 for (var iter = tokens.iterator(); iter.hasNext();) { 3859 sb.append(iter.next()); 3860 if (iter.hasNext()) 3861 sb.append(d); 3862 } 3863 return sb; 3864 } 3865 3866 /** 3867 * Join the specified tokens into a delimited string. 3868 * 3869 * @param tokens The tokens to join. 3870 * @param d The delimiter. 3871 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3872 */ 3873 public static String join(int[] tokens, char d) { 3874 if (tokens == null) 3875 return null; 3876 var sb = new StringBuilder(); 3877 for (var i = 0; i < tokens.length; i++) { 3878 if (i > 0) 3879 sb.append(d); 3880 sb.append(tokens[i]); 3881 } 3882 return sb.toString(); 3883 } 3884 3885 /** 3886 * Joins an array of integers with a delimiter. 3887 * 3888 * <h5 class='section'>Example:</h5> 3889 * <p class='bjava'> 3890 * join(<jk>new int</jk>[]{1, 2, 3}, <js>","</js>); <jc>// "1,2,3"</jc> 3891 * join(<jk>new int</jk>[]{}, <js>","</js>); <jc>// ""</jc> 3892 * </p> 3893 * 3894 * @param array The array to join. 3895 * @param delimiter The delimiter string. 3896 * @return The joined string. 3897 */ 3898 public static String join(int[] array, String delimiter) { 3899 if (array == null || array.length == 0) 3900 return ""; 3901 if (delimiter == null) 3902 delimiter = ""; 3903 return Arrays.stream(array).mapToObj(String::valueOf).collect(Collectors.joining(delimiter)); 3904 } 3905 3906 3907 /** 3908 * Joins the specified tokens into a delimited string. 3909 * 3910 * @param tokens The tokens to join. 3911 * @param d The delimiter. 3912 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3913 */ 3914 public static String join(Object[] tokens, char d) { 3915 if (tokens == null) 3916 return null; 3917 if (tokens.length == 1) 3918 return emptyIfNull(s(tokens[0])); 3919 return StringUtils.join(tokens, d, new StringBuilder()).toString(); 3920 } 3921 3922 /** 3923 * Join the specified tokens into a delimited string and writes the output to the specified string builder. 3924 * 3925 * @param tokens The tokens to join. 3926 * @param d The delimiter. 3927 * @param sb The string builder to append the response to. 3928 * @return The same string builder passed in as <c>sb</c>. 3929 */ 3930 public static StringBuilder join(Object[] tokens, char d, StringBuilder sb) { 3931 if (tokens == null) 3932 return sb; 3933 for (var i = 0; i < tokens.length; i++) { 3934 if (i > 0) 3935 sb.append(d); 3936 sb.append(tokens[i]); 3937 } 3938 return sb; 3939 } 3940 3941 /** 3942 * Join the specified tokens into a delimited string. 3943 * 3944 * @param tokens The tokens to join. 3945 * @param separator The delimiter. 3946 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3947 */ 3948 public static String join(Object[] tokens, String separator) { 3949 if (tokens == null) 3950 return null; 3951 var sb = new StringBuilder(); 3952 for (var i = 0; i < tokens.length; i++) { 3953 if (i > 0) 3954 sb.append(separator); 3955 sb.append(tokens[i]); 3956 } 3957 return sb.toString(); 3958 } 3959 3960 /** 3961 * Combines values into a simple comma-delimited string. 3962 * 3963 * @param values The values to join. 3964 * @return A comma-delimited string. 3965 */ 3966 public static String join(String...values) { 3967 return join(values, ','); 3968 } 3969 3970 /** 3971 * Same as {@link StringUtils#join(Collection, char)} but escapes the delimiter if found in the tokens. 3972 * 3973 * @param tokens The tokens to join. 3974 * @param d The delimiter. 3975 * @return The delimited string. If <c>tokens</c> is <jk>null</jk>, returns <jk>null</jk>. 3976 */ 3977 public static String joine(List<?> tokens, char d) { 3978 if (tokens == null) 3979 return null; 3980 var as = getEscapeSet(d); 3981 var sb = new StringBuilder(); 3982 for (int i = 0, j = tokens.size(); i < j; i++) { 3983 if (i > 0) 3984 sb.append(d); 3985 sb.append(escapeChars(s(tokens.get(i)), as)); 3986 } 3987 return sb.toString(); 3988 } 3989 3990 /** 3991 * Joins tokens with newlines. 3992 * 3993 * @param tokens The tokens to concatenate. 3994 * @return A string with the specified tokens contatenated with newlines. 3995 */ 3996 public static String joinnl(Object[] tokens) { 3997 return join(tokens, '\n'); 3998 } 3999 4000 /** 4001 * Converts a string to kebab-case format. 4002 * 4003 * <p> 4004 * Handles various input formats: 4005 * <ul> 4006 * <li>Space-separated: "hello world" → "hello-world"</li> 4007 * <li>CamelCase: "helloWorld" → "hello-world"</li> 4008 * <li>PascalCase: "HelloWorld" → "hello-world"</li> 4009 * <li>Snake_case: "hello_world" → "hello-world"</li> 4010 * </ul> 4011 * 4012 * <h5 class='section'>Example:</h5> 4013 * <p class='bjava'> 4014 * kebabCase(<jk>null</jk>); <jc>// null</jc> 4015 * kebabCase(<js>""</js>); <jc>// ""</jc> 4016 * kebabCase(<js>"hello world"</js>); <jc>// "hello-world"</jc> 4017 * kebabCase(<js>"helloWorld"</js>); <jc>// "hello-world"</jc> 4018 * kebabCase(<js>"HelloWorld"</js>); <jc>// "hello-world"</jc> 4019 * kebabCase(<js>"hello_world"</js>); <jc>// "hello-world"</jc> 4020 * </p> 4021 * 4022 * @param str The string to convert. 4023 * @return The kebab-case string, or <jk>null</jk> if input is <jk>null</jk>. 4024 */ 4025 public static String kebabCase(String str) { 4026 if (isEmpty(str)) 4027 return str; 4028 4029 var words = splitWords(str); 4030 if (words.isEmpty()) 4031 return ""; 4032 4033 var result = new StringBuilder(); 4034 for (var i = 0; i < words.size(); i++) { 4035 if (i > 0) 4036 result.append('-'); 4037 result.append(words.get(i).toLowerCase()); 4038 } 4039 4040 return result.toString(); 4041 } 4042 4043 /** 4044 * Finds the index of the last occurrence of a substring within a string. 4045 * 4046 * <h5 class='section'>Example:</h5> 4047 * <p class='bjava'> 4048 * lastIndexOf(<js>"hello world world"</js>, <js>"world"</js>); <jc>// 12</jc> 4049 * lastIndexOf(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// -1</jc> 4050 * lastIndexOf(<jk>null</jk>, <js>"test"</js>); <jc>// -1</jc> 4051 * </p> 4052 * 4053 * @param str The string to search in. 4054 * @param search The substring to search for. 4055 * @return The index of the last occurrence, or <c>-1</c> if not found or if either parameter is <jk>null</jk>. 4056 */ 4057 public static int lastIndexOf(String str, String search) { 4058 if (str == null || search == null) 4059 return -1; 4060 return str.lastIndexOf(search); 4061 } 4062 4063 /** 4064 * Finds the index of the last occurrence of a substring within a string, ignoring case. 4065 * 4066 * <h5 class='section'>Example:</h5> 4067 * <p class='bjava'> 4068 * lastIndexOfIgnoreCase(<js>"Hello World World"</js>, <js>"world"</js>); <jc>// 12</jc> 4069 * lastIndexOfIgnoreCase(<js>"Hello World"</js>, <js>"WORLD"</js>); <jc>// 6</jc> 4070 * lastIndexOfIgnoreCase(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// -1</jc> 4071 * </p> 4072 * 4073 * @param str The string to search in. 4074 * @param search The substring to search for. 4075 * @return The index of the last occurrence, or <c>-1</c> if not found or if either parameter is <jk>null</jk>. 4076 */ 4077 public static int lastIndexOfIgnoreCase(String str, String search) { 4078 if (str == null || search == null) 4079 return -1; 4080 return str.toLowerCase().lastIndexOf(search.toLowerCase()); 4081 } 4082 4083 /** 4084 * Returns the last non-whitespace character in the string. 4085 * 4086 * @param s The string to check. 4087 * @return 4088 * The last non-whitespace character, or <c>0</c> if the string is <jk>null</jk>, empty, or composed 4089 * of only whitespace. 4090 */ 4091 public static char lastNonWhitespaceChar(String s) { 4092 if (nn(s)) 4093 for (var i = s.length() - 1; i >= 0; i--) 4094 if (! isWhitespace(s.charAt(i))) 4095 return s.charAt(i); 4096 return 0; 4097 } 4098 4099 /** 4100 * Returns the leftmost characters of a string. 4101 * 4102 * <h5 class='section'>Example:</h5> 4103 * <p class='bjava'> 4104 * left(<jk>null</jk>, 3); <jc>// null</jc> 4105 * left(<js>""</js>, 3); <jc>// ""</jc> 4106 * left(<js>"hello"</js>, 3); <jc>// "hel"</jc> 4107 * left(<js>"hello"</js>, 10); <jc>// "hello"</jc> 4108 * </p> 4109 * 4110 * @param str The string to get characters from. 4111 * @param len The number of characters to get. 4112 * @return The leftmost characters, or <jk>null</jk> if input is <jk>null</jk>. 4113 */ 4114 public static String left(String str, int len) { 4115 if (str == null) 4116 return null; 4117 if (len < 0) 4118 return ""; 4119 if (len >= str.length()) 4120 return str; 4121 return str.substring(0, len); 4122 } 4123 4124 /** 4125 * Calculates the Levenshtein distance (edit distance) between two strings. 4126 * 4127 * <p> 4128 * The Levenshtein distance is the minimum number of single-character edits (insertions, deletions, or substitutions) required to change one string into another. 4129 * 4130 * <h5 class='section'>Example:</h5> 4131 * <p class='bjava'> 4132 * levenshteinDistance(<js>"kitten"</js>, <js>"sitting"</js>); <jc>// 3</jc> 4133 * levenshteinDistance(<js>"hello"</js>, <js>"hello"</js>); <jc>// 0</jc> 4134 * levenshteinDistance(<js>"abc"</js>, <js>""</js>); <jc>// 3</jc> 4135 * </p> 4136 * 4137 * @param str1 The first string. 4138 * @param str2 The second string. 4139 * @return The Levenshtein distance between the two strings. 4140 */ 4141 public static int levenshteinDistance(String str1, String str2) { 4142 if (str1 == null) 4143 str1 = ""; 4144 if (str2 == null) 4145 str2 = ""; 4146 4147 var len1 = str1.length(); 4148 var len2 = str2.length(); 4149 4150 // Use dynamic programming with optimized space (only need previous row) 4151 var prev = new int[len2 + 1]; 4152 var curr = new int[len2 + 1]; 4153 4154 // Initialize first row 4155 for (var j = 0; j <= len2; j++) 4156 prev[j] = j; 4157 4158 for (var i = 1; i <= len1; i++) { 4159 curr[0] = i; 4160 for (var j = 1; j <= len2; j++) { 4161 if (str1.charAt(i - 1) == str2.charAt(j - 1)) { 4162 curr[j] = prev[j - 1]; 4163 } else { 4164 curr[j] = 1 + Math.min(Math.min(prev[j], curr[j - 1]), prev[j - 1]); 4165 } 4166 } 4167 // Swap arrays 4168 var temp = prev; 4169 prev = curr; 4170 curr = temp; 4171 } 4172 4173 return prev[len2]; 4174 } 4175 4176 /** 4177 * Counts the number of lines in a string. 4178 * 4179 * <p> 4180 * Counts newline characters. A string ending without a newline is counted as one line. 4181 * 4182 * <h5 class='section'>Example:</h5> 4183 * <p class='bjava'> 4184 * lineCount(<js>"line1\nline2\nline3"</js>); <jc>// 3</jc> 4185 * lineCount(<js>"single line"</js>); <jc>// 1</jc> 4186 * lineCount(<js>"line1\r\nline2"</js>); <jc>// 2</jc> 4187 * </p> 4188 * 4189 * @param str The string to count lines in. Can be <jk>null</jk>. 4190 * @return The number of lines, or <c>0</c> if the string is <jk>null</jk> or empty. 4191 */ 4192 public static int lineCount(String str) { 4193 if (isEmpty(str)) 4194 return 0; 4195 4196 var count = 1; // At least one line 4197 for (var i = 0; i < str.length(); i++) { 4198 var c = str.charAt(i); 4199 if (c == '\n') { 4200 count++; 4201 } else if (c == '\r') { 4202 // Handle \r\n as a single line break 4203 if (i + 1 < str.length() && str.charAt(i + 1) == '\n') { 4204 i++; // Skip the \n 4205 } 4206 count++; 4207 } 4208 } 4209 4210 return count; 4211 } 4212 4213 // TODO: See if we can remove StringUtils.parseIsoCalendar. 4214 // Currently used by: 4215 // - OpenApiParserSession.java for DATE/DATE_TIME format parsing 4216 // - StringUtils.parseIsoDate() (which wraps this method) 4217 // Investigation needed: Can we replace this with java.time APIs or other standard date parsing? 4218 4219 /** 4220 * Null-safe convenience method for {@link String#toLowerCase()}. 4221 * 4222 * <p> 4223 * Converts the string to lowercase if not null. 4224 * 4225 * @param s The string to convert. 4226 * @return The lowercase string, or <jk>null</jk> if the input was <jk>null</jk>. 4227 * @see #upperCase(String) 4228 * @see Utils#lc(String) 4229 */ 4230 public static String lowerCase(String s) { 4231 return s == null ? null : s.toLowerCase(); 4232 } 4233 4234 /** 4235 * Maps each element of a string array using the specified function. 4236 * 4237 * <p> 4238 * Returns <jk>null</jk> if the array is <jk>null</jk>. 4239 * Returns an array with <jk>null</jk> elements if the function is <jk>null</jk> or returns <jk>null</jk>. 4240 * 4241 * <h5 class='section'>Examples:</h5> 4242 * <p class='bjava'> 4243 * String[] <jv>array</jv> = {<js>"foo"</js>, <js>"bar"</js>, <js>"baz"</js>}; 4244 * String[] <jv>uppercased</jv> = map(<jv>array</jv>, String::toUpperCase); 4245 * <jc>// Returns: ["FOO", "BAR", "BAZ"]</jc> 4246 * 4247 * String[] <jv>prefixed</jv> = map(<jv>array</jv>, s -> <js>"prefix-"</js> + s); 4248 * <jc>// Returns: ["prefix-foo", "prefix-bar", "prefix-baz"]</jc> 4249 * </p> 4250 * 4251 * @param array The array to map. Can be <jk>null</jk>. 4252 * @param mapper The function to apply to each element. Can be <jk>null</jk>. 4253 * @return A new array with the mapped elements, or <jk>null</jk> if the array was <jk>null</jk>. 4254 */ 4255 public static String[] mapped(String[] array, Function<String,String> mapper) { 4256 if (array == null) 4257 return null; // NOSONAR - Intentional. 4258 if (mapper == null) 4259 return Arrays.copyOf(array, array.length); 4260 return Arrays.stream(array).map(mapper).toArray(String[]::new); 4261 } 4262 4263 /** 4264 * Checks if a string matches a regular expression pattern. 4265 * 4266 * <h5 class='section'>Example:</h5> 4267 * <p class='bjava'> 4268 * matches(<js>"12345"</js>, <js>"\\d+"</js>); <jc>// true</jc> 4269 * matches(<js>"abc123"</js>, <js>"^[a-z]+\\d+$"</js>); <jc>// true</jc> 4270 * matches(<js>"abc"</js>, <js>"\\d+"</js>); <jc>// false</jc> 4271 * </p> 4272 * 4273 * @param str The string to check. 4274 * @param regex The regular expression pattern. 4275 * @return <jk>true</jk> if the string matches the pattern, <jk>false</jk> otherwise. 4276 * @throws PatternSyntaxException If the regex pattern is invalid. 4277 */ 4278 public static boolean matches(String str, String regex) { 4279 if (str == null || regex == null) 4280 return false; 4281 return str.matches(regex); 4282 } 4283 4284 /** 4285 * Generates a Metaphone code for a string. 4286 * 4287 * <p> 4288 * Metaphone is a phonetic algorithm that produces codes representing how words sound. 4289 * It's more accurate than Soundex for English words. 4290 * 4291 * <h5 class='section'>Example:</h5> 4292 * <p class='bjava'> 4293 * metaphone(<js>"Smith"</js>); <jc>// "SM0"</jc> 4294 * metaphone(<js>"Smythe"</js>); <jc>// "SM0"</jc> 4295 * metaphone(<js>"Robert"</js>); <jc>// "RBRT"</jc> 4296 * </p> 4297 * 4298 * @param str The string to generate a Metaphone code for. Can be <jk>null</jk>. 4299 * @return The Metaphone code, or <jk>null</jk> if input is <jk>null</jk> or empty. 4300 */ 4301 public static String metaphone(String str) { 4302 if (isEmpty(str)) 4303 return null; 4304 4305 var upper = str.toUpperCase().replaceAll("[^A-Z]", ""); 4306 if (upper.isEmpty()) 4307 return ""; 4308 4309 var result = new StringBuilder(); 4310 var i = 0; 4311 var len = upper.length(); 4312 4313 // Handle initial characters 4314 if (upper.startsWith("KN") || upper.startsWith("GN") || upper.startsWith("PN") || upper.startsWith("AE") || upper.startsWith("WR")) { 4315 i = 1; 4316 } else if (upper.startsWith("X")) { 4317 result.append('S'); 4318 i = 1; 4319 } else if (upper.startsWith("WH")) { 4320 result.append('W'); 4321 i = 2; 4322 } 4323 4324 // Process remaining characters 4325 while (i < len && result.length() < 4) { 4326 var c = upper.charAt(i); 4327 var prev = i > 0 ? upper.charAt(i - 1) : '\0'; 4328 var next = i < len - 1 ? upper.charAt(i + 1) : '\0'; 4329 var next2 = i < len - 2 ? upper.charAt(i + 2) : '\0'; 4330 4331 // Skip duplicates (except C) 4332 if (c == prev && c != 'C') { 4333 i++; 4334 continue; 4335 } 4336 4337 switch (c) { 4338 case 'B': 4339 if (prev != 'M' || next != '\0') 4340 result.append('B'); 4341 break; 4342 case 'C': 4343 if (next == 'H') { 4344 if (prev == 'S') 4345 result.append('K'); 4346 else 4347 result.append('X'); 4348 i++; 4349 } else if (next == 'I' || next == 'E' || next == 'Y') { 4350 result.append('S'); 4351 } else { 4352 result.append('K'); 4353 } 4354 break; 4355 case 'D': 4356 if (next == 'G' && (next2 == 'E' || next2 == 'I' || next2 == 'Y')) { 4357 result.append('J'); 4358 i++; 4359 } else { 4360 result.append('T'); 4361 } 4362 break; 4363 case 'F': 4364 case 'J': 4365 case 'L': 4366 case 'M': 4367 case 'N': 4368 case 'R': 4369 result.append(c); 4370 break; 4371 case 'G': 4372 if (next == 'H' && (next2 == 'A' || next2 == 'E' || next2 == 'I' || next2 == 'O' || next2 == 'U')) { 4373 // Silent GH 4374 } else if (next == 'N' && (next2 == 'E' || next2 == 'D')) { 4375 // Silent GN 4376 } else if ((next == 'E' || next == 'I' || next == 'Y') && prev != 'G') { 4377 result.append('J'); 4378 } else { 4379 result.append('K'); 4380 } 4381 break; 4382 case 'H': 4383 if (! VOWEL.contains(prev) || ! VOWEL.contains(next)) 4384 result.append('H'); 4385 break; 4386 case 'K': 4387 if (prev != 'C') 4388 result.append('K'); 4389 break; 4390 case 'P': 4391 if (next == 'H') { 4392 result.append('F'); 4393 i++; 4394 } else { 4395 result.append('P'); 4396 } 4397 break; 4398 case 'Q': 4399 result.append('K'); 4400 break; 4401 case 'S': 4402 if (next == 'H') { 4403 result.append('X'); 4404 i++; 4405 } else if (next == 'I' && (next2 == 'O' || next2 == 'A')) { 4406 result.append('X'); 4407 i++; 4408 } else { 4409 result.append('S'); 4410 } 4411 break; 4412 case 'T': 4413 if (next == 'H') { 4414 result.append('0'); // TH sound 4415 i++; 4416 } else if (next == 'I' && (next2 == 'O' || next2 == 'A')) { 4417 result.append('X'); 4418 i++; 4419 } else { 4420 result.append('T'); 4421 } 4422 break; 4423 case 'V': 4424 result.append('F'); 4425 break; 4426 case 'W', 'Y': 4427 if (VOWEL.contains(next)) 4428 result.append(c); 4429 break; 4430 case 'X': 4431 // X at start is handled in initial section (line 4346-4348), so i is never 0 here 4432 result.append("KS"); 4433 break; 4434 case 'Z': 4435 result.append('S'); 4436 break; 4437 default: 4438 break; 4439 } 4440 i++; 4441 } 4442 4443 return result.length() > 0 ? result.toString() : upper.substring(0, Math.min(1, upper.length())); 4444 } 4445 4446 /** 4447 * Returns the middle characters of a string. 4448 * 4449 * <h5 class='section'>Example:</h5> 4450 * <p class='bjava'> 4451 * mid(<jk>null</jk>, 1, 3); <jc>// null</jc> 4452 * mid(<js>""</js>, 1, 3); <jc>// ""</jc> 4453 * mid(<js>"hello"</js>, 1, 3); <jc>// "ell"</jc> 4454 * mid(<js>"hello"</js>, 1, 10); <jc>// "ello"</jc> 4455 * </p> 4456 * 4457 * @param str The string to get characters from. 4458 * @param pos The starting position (0-based). 4459 * @param len The number of characters to get. 4460 * @return The middle characters, or <jk>null</jk> if input is <jk>null</jk>. 4461 */ 4462 public static String mid(String str, int pos, int len) { 4463 if (str == null) 4464 return null; 4465 if (pos < 0 || len < 0) 4466 return ""; 4467 if (pos >= str.length()) 4468 return ""; 4469 int end = Math.min(pos + len, str.length()); 4470 return str.substring(pos, end); 4471 } 4472 4473 /** 4474 * Finds the most frequent character in a string. 4475 * 4476 * <p> 4477 * Returns the character that appears most often. If multiple characters have the same 4478 * frequency, returns the first one encountered. 4479 * 4480 * <h5 class='section'>Example:</h5> 4481 * <p class='bjava'> 4482 * mostFrequentChar(<js>"hello"</js>); <jc>// 'l'</jc> 4483 * mostFrequentChar(<js>"aabbcc"</js>); <jc>// 'a' (first encountered)</jc> 4484 * </p> 4485 * 4486 * @param str The string to analyze. Can be <jk>null</jk>. 4487 * @return The most frequent character, or <c>'\0'</c> if the string is <jk>null</jk> or empty. 4488 */ 4489 public static char mostFrequentChar(String str) { 4490 if (isEmpty(str)) 4491 return '\0'; 4492 4493 var charCounts = new int[Character.MAX_VALUE + 1]; 4494 var maxCount = 0; 4495 var maxChar = '\0'; 4496 4497 // Count occurrences of each character 4498 for (var i = 0; i < str.length(); i++) { 4499 var c = str.charAt(i); 4500 charCounts[c]++; 4501 if (charCounts[c] > maxCount) { 4502 maxCount = charCounts[c]; 4503 maxChar = c; 4504 } 4505 } 4506 4507 return maxChar; 4508 } 4509 4510 /** 4511 * Performs natural string comparison that handles numbers correctly. 4512 * 4513 * <p> 4514 * Compares strings in a way that numbers are compared numerically rather than lexicographically. 4515 * For example, "file2.txt" comes before "file10.txt" in natural order. 4516 * 4517 * <h5 class='section'>Example:</h5> 4518 * <p class='bjava'> 4519 * naturalCompare(<js>"file2.txt"</js>, <js>"file10.txt"</js>); <jc>// negative (2 < 10)</jc> 4520 * naturalCompare(<js>"file10.txt"</js>, <js>"file2.txt"</js>); <jc>// positive (10 > 2)</jc> 4521 * naturalCompare(<js>"file1.txt"</js>, <js>"file1.txt"</js>); <jc>// 0 (equal)</jc> 4522 * </p> 4523 * 4524 * @param str1 The first string. 4525 * @param str2 The second string. 4526 * @return A negative integer, zero, or a positive integer as the first string is less than, equal to, or greater than the second. 4527 */ 4528 public static int naturalCompare(String str1, String str2) { 4529 if (str1 == str2) 4530 return 0; 4531 if (str1 == null) 4532 return -1; 4533 if (str2 == null) 4534 return 1; 4535 4536 var len1 = str1.length(); 4537 var len2 = str2.length(); 4538 var i1 = 0; 4539 var i2 = 0; 4540 4541 while (i1 < len1 && i2 < len2) { 4542 var c1 = str1.charAt(i1); 4543 var c2 = str2.charAt(i2); 4544 4545 // If both are digits, compare numerically 4546 if (DIGIT.contains(c1) && DIGIT.contains(c2)) { 4547 // Skip leading zeros 4548 while (i1 < len1 && str1.charAt(i1) == '0') 4549 i1++; 4550 while (i2 < len2 && str2.charAt(i2) == '0') 4551 i2++; 4552 4553 // Find end of number sequences 4554 var end1 = i1; 4555 var end2 = i2; 4556 while (end1 < len1 && DIGIT.contains(str1.charAt(end1))) 4557 end1++; 4558 while (end2 < len2 && DIGIT.contains(str2.charAt(end2))) 4559 end2++; 4560 4561 // Compare lengths first (longer number is larger) 4562 var lenNum1 = end1 - i1; 4563 var lenNum2 = end2 - i2; 4564 if (lenNum1 != lenNum2) 4565 return lenNum1 - lenNum2; 4566 4567 // Same length, compare digit by digit 4568 for (var j = 0; j < lenNum1; j++) { 4569 var d1 = str1.charAt(i1 + j); 4570 var d2 = str2.charAt(i2 + j); 4571 if (d1 != d2) 4572 return d1 - d2; 4573 } 4574 4575 i1 = end1; 4576 i2 = end2; 4577 } else { 4578 // Compare characters (case-insensitive) 4579 var cmp = Character.toLowerCase(c1) - Character.toLowerCase(c2); 4580 if (cmp != 0) 4581 return cmp; 4582 i1++; 4583 i2++; 4584 } 4585 } 4586 4587 return len1 - len2; 4588 } 4589 4590 /** 4591 * Normalizes Unicode characters in a string. 4592 * 4593 * <p> 4594 * Uses Unicode normalization form NFD (Canonical Decomposition). 4595 * 4596 * <h5 class='section'>Example:</h5> 4597 * <p class='bjava'> 4598 * normalizeUnicode(<js>"café"</js>); 4599 * <jc>// Normalized form</jc> 4600 * </p> 4601 * 4602 * @param str The string to normalize. Can be <jk>null</jk>. 4603 * @return The normalized string, or <jk>null</jk> if input is <jk>null</jk>. 4604 */ 4605 public static String normalizeUnicode(String str) { 4606 if (str == null) 4607 return null; 4608 return Normalizer.normalize(str, Normalizer.Form.NFD); 4609 } 4610 4611 /** 4612 * Normalizes all whitespace in a string to single spaces. 4613 * 4614 * <h5 class='section'>Example:</h5> 4615 * <p class='bjava'> 4616 * normalizeWhitespace(<js>"hello \t\n world"</js>); <jc>// "hello world"</jc> 4617 * normalizeWhitespace(<js>" hello world "</js>); <jc>// "hello world"</jc> 4618 * </p> 4619 * 4620 * @param str The string to normalize. 4621 * @return The normalized string, or <jk>null</jk> if input is <jk>null</jk>. 4622 */ 4623 public static String normalizeWhitespace(String str) { 4624 if (str == null) 4625 return null; 4626 return str.replaceAll("\\s+", " ").trim(); 4627 } 4628 4629 /** 4630 * Checks if a string does not contain the specified character. 4631 * 4632 * <p> 4633 * This is the inverse of {@link #contains(String, char)}. 4634 * Returns <jk>true</jk> if the string is <jk>null</jk> or does not contain the character. 4635 * 4636 * <h5 class='section'>Example:</h5> 4637 * <p class='bjava'> 4638 * notContains(<js>"Hello World"</js>, <js>'x'</js>); <jc>// true</jc> 4639 * notContains(<js>"Hello World"</js>, <js>'o'</js>); <jc>// false</jc> 4640 * notContains(<jk>null</jk>, <js>'a'</js>); <jc>// true</jc> 4641 * </p> 4642 * 4643 * @param s The string to check. 4644 * @param c The character to check for. 4645 * @return <jk>true</jk> if the string does not contain the specified character. 4646 * @see #contains(String, char) 4647 * @see #notContainsAny(String, char...) 4648 */ 4649 public static boolean notContains(String s, char c) { 4650 return ! contains(s, c); 4651 } 4652 4653 /** 4654 * Checks if a string does not contain the specified substring. 4655 * 4656 * <p> 4657 * This is the inverse of {@link #contains(String, CharSequence)}. 4658 * Returns <jk>true</jk> if the string is <jk>null</jk> or does not contain the substring. 4659 * 4660 * <h5 class='section'>Example:</h5> 4661 * <p class='bjava'> 4662 * notContains(<js>"Hello World"</js>, <js>"Foo"</js>); <jc>// true</jc> 4663 * notContains(<js>"Hello World"</js>, <js>"World"</js>); <jc>// false</jc> 4664 * notContains(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4665 * </p> 4666 * 4667 * @param s The string to check. 4668 * @param substring The substring to check for. 4669 * @return <jk>true</jk> if the string does not contain the specified substring. 4670 * @see #contains(String, CharSequence) 4671 * @see #notContainsAny(String, CharSequence...) 4672 */ 4673 public static boolean notContains(String s, CharSequence substring) { 4674 return ! contains(s, substring); 4675 } 4676 4677 /** 4678 * Checks if a string does not contain the specified substring. 4679 * 4680 * <p> 4681 * This is the inverse of {@link #contains(String, String)}. 4682 * Returns <jk>true</jk> if the string is <jk>null</jk> or does not contain the substring. 4683 * 4684 * <h5 class='section'>Example:</h5> 4685 * <p class='bjava'> 4686 * notContains(<js>"Hello World"</js>, <js>"Foo"</js>); <jc>// true</jc> 4687 * notContains(<js>"Hello World"</js>, <js>"World"</js>); <jc>// false</jc> 4688 * notContains(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4689 * </p> 4690 * 4691 * @param s The string to check. 4692 * @param substring The substring to check for. 4693 * @return <jk>true</jk> if the string does not contain the specified substring. 4694 * @see #contains(String, String) 4695 * @see #notContainsAny(String, String...) 4696 */ 4697 public static boolean notContains(String s, String substring) { 4698 return ! contains(s, substring); 4699 } 4700 4701 /** 4702 * Checks if a string does not contain all of the specified characters. 4703 * 4704 * <p> 4705 * This is the inverse of {@link #containsAll(String, char...)}. 4706 * Returns <jk>true</jk> if: 4707 * <ul> 4708 * <li>The string is <jk>null</jk></li> 4709 * <li>The values array is <jk>null</jk> or empty</li> 4710 * <li>Any of the specified characters are not found in the string</li> 4711 * </ul> 4712 * 4713 * <h5 class='section'>Example:</h5> 4714 * <p class='bjava'> 4715 * notContainsAll(<js>"Hello World"</js>, <js>'H'</js>, <js>'x'</js>); <jc>// true (missing 'x')</jc> 4716 * notContainsAll(<js>"Hello World"</js>, <js>'H'</js>, <js>'e'</js>, <js>'l'</js>); <jc>// false (contains all)</jc> 4717 * notContainsAll(<jk>null</jk>, <js>'a'</js>); <jc>// true</jc> 4718 * </p> 4719 * 4720 * @param s The string to check. 4721 * @param values The characters to check for. 4722 * @return <jk>true</jk> if the string does not contain all of the specified characters. 4723 * @see #containsAll(String, char...) 4724 */ 4725 public static boolean notContainsAll(String s, char...values) { 4726 return ! containsAll(s, values); 4727 } 4728 4729 /** 4730 * Checks if a string does not contain all of the specified substrings. 4731 * 4732 * <p> 4733 * This is the inverse of {@link #containsAll(String, CharSequence...)}. 4734 * Returns <jk>true</jk> if: 4735 * <ul> 4736 * <li>The string is <jk>null</jk></li> 4737 * <li>The values array is <jk>null</jk> or empty</li> 4738 * <li>Any of the specified substrings are not found in the string</li> 4739 * </ul> 4740 * 4741 * <h5 class='section'>Example:</h5> 4742 * <p class='bjava'> 4743 * notContainsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// true (missing "Foo")</jc> 4744 * notContainsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"World"</js>); <jc>// false (contains all)</jc> 4745 * notContainsAll(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4746 * </p> 4747 * 4748 * @param s The string to check. 4749 * @param values The substrings to check for. 4750 * @return <jk>true</jk> if the string does not contain all of the specified substrings. 4751 * @see #containsAll(String, CharSequence...) 4752 */ 4753 public static boolean notContainsAll(String s, CharSequence...values) { 4754 return ! containsAll(s, values); 4755 } 4756 4757 /** 4758 * Checks if a string does not contain all of the specified substrings. 4759 * 4760 * <p> 4761 * This is the inverse of {@link #containsAll(String, String...)}. 4762 * Returns <jk>true</jk> if: 4763 * <ul> 4764 * <li>The string is <jk>null</jk></li> 4765 * <li>The values array is <jk>null</jk> or empty</li> 4766 * <li>Any of the specified substrings are not found in the string</li> 4767 * </ul> 4768 * 4769 * <h5 class='section'>Example:</h5> 4770 * <p class='bjava'> 4771 * notContainsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// true (missing "Foo")</jc> 4772 * notContainsAll(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"World"</js>); <jc>// false (contains all)</jc> 4773 * notContainsAll(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4774 * </p> 4775 * 4776 * @param s The string to check. 4777 * @param values The substrings to check for. 4778 * @return <jk>true</jk> if the string does not contain all of the specified substrings. 4779 * @see #containsAll(String, String...) 4780 */ 4781 public static boolean notContainsAll(String s, String...values) { 4782 return ! containsAll(s, values); 4783 } 4784 4785 /** 4786 * Checks if a string does not contain any of the specified characters. 4787 * 4788 * <p> 4789 * This is the inverse of {@link #containsAny(String, char...)}. 4790 * Returns <jk>true</jk> if: 4791 * <ul> 4792 * <li>The string is <jk>null</jk></li> 4793 * <li>The values array is <jk>null</jk> or empty</li> 4794 * <li>None of the specified characters are found in the string</li> 4795 * </ul> 4796 * 4797 * <h5 class='section'>Example:</h5> 4798 * <p class='bjava'> 4799 * notContainsAny(<js>"Hello World"</js>, <js>'x'</js>, <js>'y'</js>); <jc>// true</jc> 4800 * notContainsAny(<js>"Hello World"</js>, <js>'o'</js>, <js>'x'</js>); <jc>// false (contains 'o')</jc> 4801 * notContainsAny(<jk>null</jk>, <js>'a'</js>); <jc>// true</jc> 4802 * </p> 4803 * 4804 * @param s The string to check. 4805 * @param values The characters to check for. 4806 * @return <jk>true</jk> if the string does not contain any of the specified characters. 4807 * @see #containsAny(String, char...) 4808 * @see #notContainsAll(String, char...) 4809 */ 4810 public static boolean notContainsAny(String s, char...values) { 4811 return ! containsAny(s, values); 4812 } 4813 4814 /** 4815 * Checks if a string does not contain any of the specified substrings. 4816 * 4817 * <p> 4818 * This is the inverse of {@link #containsAny(String, CharSequence...)}. 4819 * Returns <jk>true</jk> if: 4820 * <ul> 4821 * <li>The string is <jk>null</jk></li> 4822 * <li>The values array is <jk>null</jk> or empty</li> 4823 * <li>None of the specified substrings are found in the string</li> 4824 * </ul> 4825 * 4826 * <h5 class='section'>Example:</h5> 4827 * <p class='bjava'> 4828 * notContainsAny(<js>"Hello World"</js>, <js>"Foo"</js>, <js>"Bar"</js>); <jc>// true</jc> 4829 * notContainsAny(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// false (contains "Hello")</jc> 4830 * notContainsAny(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4831 * </p> 4832 * 4833 * @param s The string to check. 4834 * @param values The substrings to check for. 4835 * @return <jk>true</jk> if the string does not contain any of the specified substrings. 4836 * @see #containsAny(String, CharSequence...) 4837 * @see #notContainsAll(String, CharSequence...) 4838 */ 4839 public static boolean notContainsAny(String s, CharSequence...values) { 4840 return ! containsAny(s, values); 4841 } 4842 4843 /** 4844 * Checks if a string does not contain any of the specified substrings. 4845 * 4846 * <p> 4847 * This is the inverse of {@link #containsAny(String, String...)}. 4848 * Returns <jk>true</jk> if: 4849 * <ul> 4850 * <li>The string is <jk>null</jk></li> 4851 * <li>The values array is <jk>null</jk> or empty</li> 4852 * <li>None of the specified substrings are found in the string</li> 4853 * </ul> 4854 * 4855 * <h5 class='section'>Example:</h5> 4856 * <p class='bjava'> 4857 * notContainsAny(<js>"Hello World"</js>, <js>"Foo"</js>, <js>"Bar"</js>); <jc>// true</jc> 4858 * notContainsAny(<js>"Hello World"</js>, <js>"Hello"</js>, <js>"Foo"</js>); <jc>// false (contains "Hello")</jc> 4859 * notContainsAny(<jk>null</jk>, <js>"Hello"</js>); <jc>// true</jc> 4860 * </p> 4861 * 4862 * @param s The string to check. 4863 * @param values The substrings to check for. 4864 * @return <jk>true</jk> if the string does not contain any of the specified substrings. 4865 * @see #containsAny(String, String...) 4866 * @see #notContainsAll(String, String...) 4867 */ 4868 public static boolean notContainsAny(String s, String...values) { 4869 return ! containsAny(s, values); 4870 } 4871 4872 /** 4873 * Returns an obfuscated version of the specified string. 4874 * 4875 * @param s The string to obfuscate. 4876 * @return The obfuscated string with most characters replaced by asterisks. 4877 */ 4878 public static String obfuscate(String s) { 4879 if (s == null || s.length() < 2) 4880 return "*"; 4881 return s.substring(0, 1) + s.substring(1).replaceAll(".", "*"); // NOSONAR 4882 } 4883 4884 /** 4885 * Converts a number to its ordinal form (1st, 2nd, 3rd, 4th, etc.). 4886 * 4887 * <h5 class='section'>Example:</h5> 4888 * <p class='bjava'> 4889 * ordinal(<js>1</js>); <jc>// "1st"</jc> 4890 * ordinal(<js>2</js>); <jc>// "2nd"</jc> 4891 * ordinal(<js>3</js>); <jc>// "3rd"</jc> 4892 * ordinal(<js>4</js>); <jc>// "4th"</jc> 4893 * ordinal(<js>11</js>); <jc>// "11th"</jc> 4894 * ordinal(<js>21</js>); <jc>// "21st"</jc> 4895 * </p> 4896 * 4897 * @param number The number to convert. 4898 * @return The ordinal string representation of the number. 4899 */ 4900 public static String ordinal(int number) { 4901 var abs = Math.abs(number); 4902 var suffix = "th"; 4903 4904 // Special cases for 11, 12, 13 (all use "th") 4905 if (abs % 100 != 11 && abs % 100 != 12 && abs % 100 != 13) { 4906 var lastDigit = abs % 10; 4907 if (lastDigit == 1) 4908 suffix = "st"; 4909 else if (lastDigit == 2) 4910 suffix = "nd"; 4911 else if (lastDigit == 3) 4912 suffix = "rd"; 4913 } 4914 4915 return number + suffix; 4916 } 4917 4918 /** 4919 * Center pads a string with a specified character. 4920 * 4921 * <h5 class='section'>Example:</h5> 4922 * <p class='bjava'> 4923 * padCenter(<jk>null</jk>, 5, <js>' '</js>); <jc>// " "</jc> 4924 * padCenter(<js>""</js>, 5, <js>' '</js>); <jc>// " "</jc> 4925 * padCenter(<js>"hi"</js>, 6, <js>' '</js>); <jc>// " hi "</jc> 4926 * padCenter(<js>"hi"</js>, 7, <js>' '</js>); <jc>// " hi "</jc> 4927 * padCenter(<js>"hello"</js>, 3, <js>' '</js>); <jc>// "hello"</jc> 4928 * </p> 4929 * 4930 * @param str The string to pad. 4931 * @param size The desired total string length. 4932 * @param padChar The character to pad with. 4933 * @return The center-padded string. 4934 */ 4935 public static String padCenter(String str, int size, char padChar) { 4936 if (str == null) 4937 str = ""; 4938 int pads = size - str.length(); 4939 if (pads <= 0) 4940 return str; 4941 int rightPads = pads / 2; 4942 int leftPads = pads - rightPads; 4943 return String.valueOf(padChar).repeat(leftPads) + str + String.valueOf(padChar).repeat(rightPads); 4944 } 4945 4946 /** 4947 * Left pads a string with a specified character. 4948 * 4949 * <h5 class='section'>Example:</h5> 4950 * <p class='bjava'> 4951 * padLeft(<jk>null</jk>, 5, <js>' '</js>); <jc>// " "</jc> 4952 * padLeft(<js>""</js>, 5, <js>' '</js>); <jc>// " "</jc> 4953 * padLeft(<js>"hello"</js>, 8, <js>' '</js>); <jc>// " hello"</jc> 4954 * padLeft(<js>"hello"</js>, 3, <js>' '</js>); <jc>// "hello"</jc> 4955 * padLeft(<js>"123"</js>, 5, <js>'0'</js>); <jc>// "00123"</jc> 4956 * </p> 4957 * 4958 * @param str The string to pad. 4959 * @param size The desired total string length. 4960 * @param padChar The character to pad with. 4961 * @return The left-padded string. 4962 */ 4963 public static String padLeft(String str, int size, char padChar) { 4964 if (str == null) 4965 str = ""; 4966 int pads = size - str.length(); 4967 if (pads <= 0) 4968 return str; 4969 return String.valueOf(padChar).repeat(pads) + str; 4970 } 4971 4972 /** 4973 * Right pads a string with a specified character. 4974 * 4975 * <h5 class='section'>Example:</h5> 4976 * <p class='bjava'> 4977 * padRight(<jk>null</jk>, 5, <js>' '</js>); <jc>// " "</jc> 4978 * padRight(<js>""</js>, 5, <js>' '</js>); <jc>// " "</jc> 4979 * padRight(<js>"hello"</js>, 8, <js>' '</js>); <jc>// "hello "</jc> 4980 * padRight(<js>"hello"</js>, 3, <js>' '</js>); <jc>// "hello"</jc> 4981 * padRight(<js>"123"</js>, 5, <js>'0'</js>); <jc>// "12300"</jc> 4982 * </p> 4983 * 4984 * @param str The string to pad. 4985 * @param size The desired total string length. 4986 * @param padChar The character to pad with. 4987 * @return The right-padded string. 4988 */ 4989 public static String padRight(String str, int size, char padChar) { 4990 if (str == null) 4991 str = ""; 4992 int pads = size - str.length(); 4993 if (pads <= 0) 4994 return str; 4995 return str + String.valueOf(padChar).repeat(pads); 4996 } 4997 4998 /** 4999 * Converts a <c>String</c> to a <c>Character</c> 5000 * 5001 * @param o The string to convert. 5002 * @return The first character of the string if the string is of length 1, or <jk>null</jk> if the string is <jk>null</jk> or empty. 5003 * @throws IllegalArgumentException If the string length is not 1. 5004 */ 5005 public static Character parseCharacter(Object o) { 5006 if (o == null) 5007 return null; 5008 var s = o.toString(); 5009 if (s.isEmpty()) 5010 return null; 5011 if (s.length() == 1) 5012 return s.charAt(0); 5013 throw illegalArg("Invalid character: ''{0}''", s); 5014 } 5015 5016 /** 5017 * Same as {@link Float#parseFloat(String)} but removes any underscore characters first. 5018 * 5019 * <p>Allows for better readability of numeric literals (e.g., <js>"1_000.5"</js>). 5020 * 5021 * @param value The string to parse. 5022 * @return The parsed float value. 5023 * @throws NumberFormatException If the string cannot be parsed. 5024 * @throws NullPointerException If the string is <jk>null</jk>. 5025 */ 5026 public static float parseFloat(String value) { 5027 return Float.parseFloat(StringUtils.removeUnderscores(value)); 5028 } 5029 5030 /** 5031 * Same as {@link Integer#parseInt(String)} but removes any underscore characters first. 5032 * 5033 * <p>Allows for better readability of numeric literals (e.g., <js>"1_000_000"</js>). 5034 * 5035 * @param value The string to parse. 5036 * @return The parsed integer value. 5037 * @throws NumberFormatException If the string cannot be parsed. 5038 * @throws NullPointerException If the string is <jk>null</jk>. 5039 */ 5040 public static int parseInt(String value) { 5041 return Integer.parseInt(StringUtils.removeUnderscores(value)); 5042 } 5043 5044 /** 5045 * Converts a string containing a possible multiplier suffix to an integer. 5046 * 5047 * <p> 5048 * The string can contain any of the following multiplier suffixes: 5049 * <ul> 5050 * <li><js>"K"</js> - x 1024 5051 * <li><js>"M"</js> - x 1024*1024 5052 * <li><js>"G"</js> - x 1024*1024*1024 5053 * <li><js>"k"</js> - x 1000 5054 * <li><js>"m"</js> - x 1000*1000 5055 * <li><js>"g"</js> - x 1000*1000*1000 5056 * </ul> 5057 * 5058 * @param s The string to parse. 5059 * @return The parsed value. 5060 */ 5061 public static int parseIntWithSuffix(String s) { 5062 assertArgNotNull("s", s); 5063 var m = multiplierInt(s); 5064 if (m == 1) 5065 return Integer.decode(s); 5066 return Integer.decode(s.substring(0, s.length() - 1).trim()) * m; // NOSONAR - NPE not possible here. 5067 } 5068 5069 /** 5070 * Same as {@link Long#parseLong(String)} but removes any underscore characters first. 5071 * 5072 * <p>Allows for better readability of numeric literals (e.g., <js>"1_000_000"</js>). 5073 * 5074 * @param value The string to parse. 5075 * @return The parsed long value. 5076 * @throws NumberFormatException If the string cannot be parsed. 5077 * @throws NullPointerException If the string is <jk>null</jk>. 5078 */ 5079 public static long parseLong(String value) { 5080 return Long.parseLong(StringUtils.removeUnderscores(value)); 5081 } 5082 5083 /** 5084 * Converts a string containing a possible multiplier suffix to a long. 5085 * 5086 * <p> 5087 * The string can contain any of the following multiplier suffixes: 5088 * <ul> 5089 * <li><js>"K"</js> - x 1024 5090 * <li><js>"M"</js> - x 1024*1024 5091 * <li><js>"G"</js> - x 1024*1024*1024 5092 * <li><js>"T"</js> - x 1024*1024*1024*1024 5093 * <li><js>"P"</js> - x 1024*1024*1024*1024*1024 5094 * <li><js>"k"</js> - x 1000 5095 * <li><js>"m"</js> - x 1000*1000 5096 * <li><js>"g"</js> - x 1000*1000*1000 5097 * <li><js>"t"</js> - x 1000*1000*1000*1000 5098 * <li><js>"p"</js> - x 1000*1000*1000*1000*1000 5099 * </ul> 5100 * 5101 * @param s The string to parse. 5102 * @return The parsed value. 5103 */ 5104 public static long parseLongWithSuffix(String s) { 5105 assertArgNotNull("s", s); 5106 var m = multiplierLong(s); 5107 if (m == 1) { 5108 // If multiplier is 1, try to decode the whole string 5109 // This will throw NumberFormatException if it contains a suffix character 5110 return Long.decode(s); 5111 } 5112 var baseStr = s.substring(0, s.length() - 1).trim(); 5113 var base = Long.decode(baseStr); // NOSONAR - NPE not possible here. 5114 try { 5115 return Math.multiplyExact(base, m); 5116 } catch (@SuppressWarnings("unused") ArithmeticException e) { 5117 throw new NumberFormatException("Value " + s + " exceeds Long.MAX_VALUE"); 5118 } 5119 } 5120 5121 /** 5122 * Parses a string containing key-value pairs into a map. 5123 * 5124 * <p> 5125 * Splits the string by the entry delimiter, then splits each entry by the key-value delimiter. 5126 * 5127 * <h5 class='section'>Example:</h5> 5128 * <p class='bjava'> 5129 * parseMap(<js>"key1=value1,key2=value2"</js>, <js>'='</js>, <js>','</js>, <jk>false</jk>); 5130 * <jc>// {"key1":"value1","key2":"value2"}</jc> 5131 * parseMap(<js>" key1 = value1 ; key2 = value2 "</js>, <js>'='</js>, <js>';'</js>, <jk>true</jk>); 5132 * <jc>// {"key1":"value1","key2":"value2"}</jc> 5133 * </p> 5134 * 5135 * @param str The string to parse. Can be <jk>null</jk>. 5136 * @param keyValueDelimiter The character that separates keys from values. 5137 * @param entryDelimiter The character that separates entries. 5138 * @param trimKeys If <jk>true</jk>, trims whitespace from keys and values. 5139 * @return A map containing the parsed key-value pairs, or an empty map if the string is <jk>null</jk> or empty. 5140 */ 5141 public static Map<String,String> parseMap(String str, char keyValueDelimiter, char entryDelimiter, boolean trimKeys) { 5142 var result = new LinkedHashMap<String,String>(); 5143 if (isEmpty(str)) 5144 return result; 5145 5146 var entries = split(str, entryDelimiter); 5147 for (var entry : entries) { 5148 if (isEmpty(entry)) 5149 continue; 5150 var delimiterIndex = entry.indexOf(keyValueDelimiter); 5151 if (delimiterIndex == -1) { 5152 // No delimiter found, treat entire entry as key with empty value 5153 var key = trimKeys ? entry.trim() : entry; 5154 result.put(key, ""); 5155 } else { 5156 var key = entry.substring(0, delimiterIndex); 5157 var value = entry.substring(delimiterIndex + 1); 5158 if (trimKeys) { 5159 key = key.trim(); 5160 value = value.trim(); 5161 } 5162 result.put(key, value); 5163 } 5164 } 5165 return result; 5166 } 5167 5168 /** 5169 * Parses a number from the specified string. 5170 * 5171 * <p> 5172 * Supports Java 7+ numeric literals with underscores (e.g., <js>"1_000_000"</js>). 5173 * The underscores are automatically removed before parsing. 5174 * 5175 * @param s The string to parse the number from. 5176 * @param type 5177 * The number type to created. 5178 * Can be any of the following: 5179 * <ul> 5180 * <li> Integer (or <c>int</c> primitive) 5181 * <li> Long (or <c>long</c> primitive) 5182 * <li> Short (or <c>short</c> primitive) 5183 * <li> Byte (or <c>byte</c> primitive) 5184 * <li> Float (or <c>float</c> primitive) 5185 * <li> Double (or <c>double</c> primitive) 5186 * <li> BigInteger 5187 * <li> BigDecimal 5188 * <li> AtomicInteger 5189 * <li> AtomicLong 5190 * </ul> 5191 * If <jk>null</jk> or <c>Number</c>, uses the best guess. 5192 * @return The parsed number, or <jk>null</jk> if the string was null. 5193 */ 5194 public static Number parseNumber(String s, Class<? extends Number> type) { 5195 if (s == null) 5196 return null; 5197 if (s.isEmpty()) 5198 s = "0"; 5199 if (type == null) 5200 type = Number.class; 5201 5202 // Remove underscores (Java 7+ numeric literal support) before parsing 5203 // Note: We do this before type detection to ensure clean parsing 5204 s = s.replace("_", ""); 5205 5206 // Determine the data type if it wasn't specified. 5207 var isAutoDetect = (type == Number.class); 5208 var isDecimal = false; 5209 if (isAutoDetect) { 5210 // If we're auto-detecting, then we use either an Integer, Long, or Double depending on how 5211 // long the string is. 5212 // An integer range is -2,147,483,648 to 2,147,483,647 5213 // An long range is -9,223,372,036,854,775,808 to +9,223,372,036,854,775,807 5214 isDecimal = isDecimal(s); 5215 if (isDecimal) { 5216 if (s.length() > 20) 5217 type = Double.class; 5218 else if (s.length() >= 10) 5219 type = Long.class; 5220 else 5221 type = Integer.class; 5222 } else if (isFloat(s)) 5223 type = Double.class; 5224 else 5225 throw new NumberFormatException(s); 5226 } 5227 5228 if (type == Double.class || type == Double.TYPE) { 5229 var d = Double.valueOf(s); 5230 var f = Float.valueOf(s); 5231 if (isAutoDetect && (! isDecimal) && d.toString().equals(f.toString())) 5232 return f; 5233 return d; 5234 } 5235 if (type == Float.class || type == Float.TYPE) 5236 return Float.valueOf(s); 5237 if (type == BigDecimal.class) 5238 return new BigDecimal(s); 5239 if (type == Long.class || type == Long.TYPE || type == AtomicLong.class) { 5240 try { 5241 var l = parseLongWithSuffix(s); 5242 if (type == AtomicLong.class) 5243 return new AtomicLong(l); 5244 if (isAutoDetect && l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { 5245 // This occurs if the string is 10 characters long but is still a valid integer value. 5246 return (int)l; 5247 } 5248 return l; 5249 } catch (NumberFormatException e) { 5250 if (isAutoDetect) { 5251 // This occurs if the string is 20 characters long but still falls outside the range of a valid long. 5252 return Double.valueOf(s); 5253 } 5254 throw e; 5255 } 5256 } 5257 if (type == Integer.class || type == Integer.TYPE) 5258 return Integer.decode(s); 5259 if (type == Short.class || type == Short.TYPE) 5260 return Short.decode(s); 5261 if (type == Byte.class || type == Byte.TYPE) 5262 return Byte.decode(s); 5263 if (type == BigInteger.class) 5264 return new BigInteger(s); 5265 if (type == AtomicInteger.class) 5266 return new AtomicInteger(Integer.decode(s)); 5267 throw new NumberFormatException("Unsupported Number type: " + type.getName()); 5268 } 5269 5270 /** 5271 * Converts a string to PascalCase format. 5272 * 5273 * <p> 5274 * Handles various input formats: 5275 * <ul> 5276 * <li>Space-separated: "hello world" → "HelloWorld"</li> 5277 * <li>CamelCase: "helloWorld" → "HelloWorld"</li> 5278 * <li>Snake_case: "hello_world" → "HelloWorld"</li> 5279 * <li>Kebab-case: "hello-world" → "HelloWorld"</li> 5280 * </ul> 5281 * 5282 * <h5 class='section'>Example:</h5> 5283 * <p class='bjava'> 5284 * pascalCase(<jk>null</jk>); <jc>// null</jc> 5285 * pascalCase(<js>""</js>); <jc>// ""</jc> 5286 * pascalCase(<js>"hello world"</js>); <jc>// "HelloWorld"</jc> 5287 * pascalCase(<js>"helloWorld"</js>); <jc>// "HelloWorld"</jc> 5288 * pascalCase(<js>"hello_world"</js>); <jc>// "HelloWorld"</jc> 5289 * pascalCase(<js>"hello-world"</js>); <jc>// "HelloWorld"</jc> 5290 * </p> 5291 * 5292 * @param str The string to convert. 5293 * @return The PascalCase string, or <jk>null</jk> if input is <jk>null</jk>. 5294 */ 5295 public static String pascalCase(String str) { 5296 if (isEmpty(str)) 5297 return str; 5298 5299 var words = splitWords(str); 5300 if (words.isEmpty()) 5301 return ""; 5302 5303 var result = new StringBuilder(); 5304 for (var word : words) { 5305 result.append(capitalize(word.toLowerCase())); 5306 } 5307 5308 return result.toString(); 5309 } 5310 5311 /** 5312 * Pluralizes a word based on a count. 5313 * 5314 * <p> 5315 * Simple pluralization that adds "s" to most words, with basic rules for words ending in "s", "x", "z", "ch", "sh" (add "es"), 5316 * and words ending in "y" preceded by a consonant (replace "y" with "ies"). 5317 * 5318 * <h5 class='section'>Example:</h5> 5319 * <p class='bjava'> 5320 * pluralize(<js>"cat"</js>, <js>1</js>); <jc>// "cat"</jc> 5321 * pluralize(<js>"cat"</js>, <js>2</js>); <jc>// "cats"</jc> 5322 * pluralize(<js>"box"</js>, <js>2</js>); <jc>// "boxes"</jc> 5323 * pluralize(<js>"city"</js>, <js>2</js>); <jc>// "cities"</jc> 5324 * </p> 5325 * 5326 * @param word The word to pluralize. 5327 * @param count The count to determine if pluralization is needed. 5328 * @return The pluralized word if count is not 1, otherwise the original word. 5329 */ 5330 public static String pluralize(String word, int count) { 5331 if (word == null || word.isEmpty()) 5332 return word; 5333 if (count == 1) 5334 return word; 5335 5336 var lower = word.toLowerCase(); 5337 var length = word.length(); 5338 5339 // Words ending in s, x, z, ch, sh -> add "es" 5340 if (lower.endsWith("s") || lower.endsWith("x") || lower.endsWith("z") || lower.endsWith("ch") || lower.endsWith("sh")) { 5341 return word + "es"; 5342 } 5343 5344 // Words ending in "y" preceded by a consonant -> replace "y" with "ies" 5345 if (length > 1 && lower.endsWith("y")) { 5346 var secondLast = lower.charAt(length - 2); 5347 if (! VOWEL.contains(secondLast)) { 5348 return word.substring(0, length - 1) + "ies"; 5349 } 5350 } 5351 5352 // Words ending in "f" or "fe" -> replace with "ves" (basic rule) 5353 if (lower.endsWith("f")) { 5354 return word.substring(0, length - 1) + "ves"; 5355 } 5356 if (lower.endsWith("fe")) { 5357 return word.substring(0, length - 2) + "ves"; 5358 } 5359 5360 // Default: add "s" 5361 return word + "s"; 5362 } 5363 5364 /** 5365 * Generated a random UUID with the specified number of characters. 5366 * 5367 * <p> 5368 * Characters are composed of lower-case ASCII letters and numbers only. 5369 * 5370 * <p> 5371 * This method conforms to the restrictions for hostnames as specified in <a class="doclink" href="https://tools.ietf.org/html/rfc952">RFC 952</a> 5372 * Since each character has 36 possible values, the square approximation formula for the number of generated IDs 5373 * that would produce a 50% chance of collision is: 5374 * <c>sqrt(36^N)</c>. 5375 * Dividing this number by 10 gives you an approximation of the number of generated IDs needed to produce a 5376 * <1% chance of collision. 5377 * 5378 * <p> 5379 * For example, given 5 characters, the number of generated IDs need to produce a <1% chance of collision would 5380 * be: 5381 * <c>sqrt(36^5)/10=777</c> 5382 * 5383 * @param numchars The number of characters in the generated UUID. 5384 * @return A new random UUID. 5385 */ 5386 public static String random(int numchars) { 5387 var sb = new StringBuilder(numchars); 5388 for (var i = 0; i < numchars; i++) { 5389 var c = RANDOM.nextInt(36) + 97; 5390 if (c > 'z') 5391 c -= ('z' - '0' + 1); 5392 sb.append((char)c); 5393 } 5394 return sb.toString(); 5395 } 5396 5397 /** 5398 * Generates a random alphabetic string of the specified length. 5399 * 5400 * <p> 5401 * Characters are composed of upper-case and lower-case ASCII letters (a-z, A-Z). 5402 * 5403 * <h5 class='section'>Example:</h5> 5404 * <p class='bjava'> 5405 * randomAlphabetic(<jk>5</jk>); <jc>// "aBcDe"</jc> 5406 * </p> 5407 * 5408 * @param length The length of the generated string. 5409 * @return A new random alphabetic string. 5410 */ 5411 public static String randomAlphabetic(int length) { 5412 if (length < 0) 5413 throw new IllegalArgumentException("Length must be non-negative: " + length); 5414 var sb = new StringBuilder(length); 5415 for (var i = 0; i < length; i++) { 5416 var c = RANDOM.nextInt(52); 5417 if (c < 26) 5418 sb.append((char)('a' + c)); 5419 else 5420 sb.append((char)('A' + c - 26)); 5421 } 5422 return sb.toString(); 5423 } 5424 5425 /** 5426 * Generates a random alphanumeric string of the specified length. 5427 * 5428 * <p> 5429 * Characters are composed of upper-case and lower-case ASCII letters and digits (a-z, A-Z, 0-9). 5430 * 5431 * <h5 class='section'>Example:</h5> 5432 * <p class='bjava'> 5433 * randomAlphanumeric(<jk>8</jk>); <jc>// "aB3dE5fG"</jc> 5434 * </p> 5435 * 5436 * @param length The length of the generated string. 5437 * @return A new random alphanumeric string. 5438 */ 5439 public static String randomAlphanumeric(int length) { 5440 if (length < 0) 5441 throw new IllegalArgumentException("Length must be non-negative: " + length); 5442 var sb = new StringBuilder(length); 5443 for (var i = 0; i < length; i++) { 5444 var c = RANDOM.nextInt(62); 5445 if (c < 10) 5446 sb.append((char)('0' + c)); 5447 else if (c < 36) 5448 sb.append((char)('a' + c - 10)); 5449 else 5450 sb.append((char)('A' + c - 36)); 5451 } 5452 return sb.toString(); 5453 } 5454 5455 /** 5456 * Generates a random ASCII string of the specified length. 5457 * 5458 * <p> 5459 * Characters are composed of printable ASCII characters (32-126). 5460 * 5461 * <h5 class='section'>Example:</h5> 5462 * <p class='bjava'> 5463 * randomAscii(<jk>10</jk>); <jc>// "!@#$%^&*()"</jc> 5464 * </p> 5465 * 5466 * @param length The length of the generated string. 5467 * @return A new random ASCII string. 5468 */ 5469 public static String randomAscii(int length) { 5470 if (length < 0) 5471 throw new IllegalArgumentException("Length must be non-negative: " + length); 5472 var sb = new StringBuilder(length); 5473 for (var i = 0; i < length; i++) { 5474 sb.append((char)(32 + RANDOM.nextInt(95))); // 95 printable ASCII chars (32-126) 5475 } 5476 return sb.toString(); 5477 } 5478 5479 /** 5480 * Generates a random numeric string of the specified length. 5481 * 5482 * <p> 5483 * Characters are composed of digits (0-9). 5484 * 5485 * <h5 class='section'>Example:</h5> 5486 * <p class='bjava'> 5487 * randomNumeric(<jk>6</jk>); <jc>// "123456"</jc> 5488 * </p> 5489 * 5490 * @param length The length of the generated string. 5491 * @return A new random numeric string. 5492 */ 5493 public static String randomNumeric(int length) { 5494 if (length < 0) 5495 throw new IllegalArgumentException("Length must be non-negative: " + length); 5496 var sb = new StringBuilder(length); 5497 for (var i = 0; i < length; i++) { 5498 sb.append((char)('0' + RANDOM.nextInt(10))); 5499 } 5500 return sb.toString(); 5501 } 5502 5503 /** 5504 * Generates a random string of the specified length using characters from the given character set. 5505 * 5506 * <p> 5507 * Each character in the generated string is randomly selected from the provided character set. 5508 * 5509 * <h5 class='section'>Example:</h5> 5510 * <p class='bjava'> 5511 * randomString(<jk>5</jk>, <js>"ABC"</js>); <jc>// "BACAB"</jc> 5512 * </p> 5513 * 5514 * @param length The length of the generated string. 5515 * @param chars The character set to use. Must not be null or empty. 5516 * @return A new random string. 5517 * @throws IllegalArgumentException If chars is null or empty, or length is negative. 5518 */ 5519 public static String randomString(int length, String chars) { 5520 if (length < 0) 5521 throw new IllegalArgumentException("Length must be non-negative: " + length); 5522 if (chars == null || chars.isEmpty()) 5523 throw new IllegalArgumentException("Character set must not be null or empty"); 5524 var sb = new StringBuilder(length); 5525 var charsLen = chars.length(); 5526 for (var i = 0; i < length; i++) { 5527 sb.append(chars.charAt(RANDOM.nextInt(charsLen))); 5528 } 5529 return sb.toString(); 5530 } 5531 5532 /** 5533 * Calculates a simple readability score for a string. 5534 * 5535 * <p> 5536 * Uses a simplified Flesch Reading Ease-like formula based on: 5537 * <ul> 5538 * <li>Average words per sentence</li> 5539 * <li>Average syllables per word (estimated)</li> 5540 * </ul> 5541 * Returns a score from 0-100, where higher scores indicate easier reading. 5542 * 5543 * <h5 class='section'>Example:</h5> 5544 * <p class='bjava'> 5545 * readabilityScore(<js>"The cat sat."</js>); <jc>// Higher score (simple)</jc> 5546 * readabilityScore(<js>"The sophisticated..."</js>); <jc>// Lower score (complex)</jc> 5547 * </p> 5548 * 5549 * @param str The string to analyze. Can be <jk>null</jk>. 5550 * @return A readability score from 0-100, or <c>0.0</c> if the string is <jk>null</jk> or empty. 5551 */ 5552 public static double readabilityScore(String str) { 5553 if (isEmpty(str)) 5554 return 0.0; 5555 5556 var words = extractWords(str); 5557 if (words.isEmpty()) 5558 return 0.0; 5559 5560 // Count sentences (ending with . ! ?) 5561 var sentenceCount = 0; 5562 for (var i = 0; i < str.length(); i++) { 5563 var c = str.charAt(i); 5564 if (c == '.' || c == '!' || c == '?') { 5565 sentenceCount++; 5566 } 5567 } 5568 if (sentenceCount == 0) 5569 sentenceCount = 1; // At least one sentence 5570 5571 // Calculate average words per sentence 5572 var avgWordsPerSentence = (double)words.size() / sentenceCount; 5573 5574 // Estimate average syllables per word (simplified: count vowel groups) 5575 var totalSyllables = 0; 5576 for (var word : words) { 5577 totalSyllables += estimateSyllables(word); 5578 } 5579 var avgSyllablesPerWord = (double)totalSyllables / words.size(); 5580 5581 // Simplified Flesch Reading Ease formula 5582 // Score = 206.835 - (1.015 * ASL) - (84.6 * ASW) 5583 // Where ASL = average sentence length (words), ASW = average syllables per word 5584 var score = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 * avgSyllablesPerWord); 5585 5586 // Clamp to 0-100 range 5587 return Math.max(0.0, Math.min(100.0, score)); 5588 } 5589 5590 /** 5591 * Converts an arbitrary object to a readable string format suitable for debugging and testing. 5592 * 5593 * <p>This method provides intelligent formatting for various Java types, recursively processing 5594 * nested structures to create human-readable representations. It's extensively used throughout 5595 * the Juneau framework for test assertions and debugging output.</p> 5596 * 5597 * <h5 class='section'>Type-Specific Formatting:</h5> 5598 * <ul> 5599 * <li><b>null:</b> Returns <js>null</js></li> 5600 * <li><b>Optional:</b> Recursively formats the contained value (or <js>null</js> if empty)</li> 5601 * <li><b>Collections:</b> Formats as <js>"[item1,item2,item3]"</js> with comma-separated elements</li> 5602 * <li><b>Maps:</b> Formats as <js>"{key1=value1,key2=value2}"</js> with comma-separated entries</li> 5603 * <li><b>Map.Entry:</b> Formats as <js>"key=value"</js></li> 5604 * <li><b>Arrays:</b> Converts to list format <js>"[item1,item2,item3]"</js></li> 5605 * <li><b>Iterables/Iterators/Enumerations:</b> Converts to list and formats recursively</li> 5606 * <li><b>GregorianCalendar:</b> Formats as ISO instant timestamp</li> 5607 * <li><b>Date:</b> Formats as ISO instant string (e.g., <js>"2023-12-25T10:30:00Z"</js>)</li> 5608 * <li><b>InputStream:</b> Converts to hexadecimal representation</li> 5609 * <li><b>Reader:</b> Reads content and returns as string</li> 5610 * <li><b>File:</b> Reads file content and returns as string</li> 5611 * <li><b>byte[]:</b> Converts to hexadecimal representation</li> 5612 * <li><b>Enum:</b> Returns the enum name via {@link Enum#name()}</li> 5613 * <li><b>All other types:</b> Uses {@link Object#toString()}</li> 5614 * </ul> 5615 * 5616 * <h5 class='section'>Examples:</h5> 5617 * <p class='bjava'> 5618 * <jc>// Collections</jc> 5619 * readable(List.of("a", "b", "c")) <jc>// Returns: "[a,b,c]"</jc> 5620 * readable(Set.of(1, 2, 3)) <jc>// Returns: "[1,2,3]" (order may vary)</jc> 5621 * 5622 * <jc>// Maps</jc> 5623 * readable(Map.of("foo", "bar", "baz", 123)) <jc>// Returns: "{foo=bar,baz=123}"</jc> 5624 * 5625 * <jc>// Arrays</jc> 5626 * readable(ints(1, 2, 3)) <jc>// Returns: "[1,2,3]"</jc> 5627 * readable(new String[]{"a", "b"}) <jc>// Returns: "[a,b]"</jc> 5628 * 5629 * <jc>// Nested structures</jc> 5630 * readable(List.of(Map.of("x", 1), Set.of("a", "b"))) <jc>// Returns: "[{x=1},[a,b]]"</jc> 5631 * 5632 * <jc>// Special types</jc> 5633 * readable(Optional.of("test")) <jc>// Returns: "test"</jc> 5634 * readable(Optional.empty()) <jc>// Returns: null</jc> 5635 * readable(new Date(1640995200000L)) <jc>// Returns: "2022-01-01T00:00:00Z"</jc> 5636 * readable(MyEnum.FOO) <jc>// Returns: "FOO"</jc> 5637 * </p> 5638 * 5639 * <h5 class='section'>Recursive Processing:</h5> 5640 * <p>The method recursively processes nested structures, so complex objects containing 5641 * collections, maps, and arrays are fully flattened into readable format. This makes it 5642 * ideal for test assertions where you need to compare complex object structures.</p> 5643 * 5644 * <h5 class='section'>Error Handling:</h5> 5645 * <p>IO operations (reading files, streams) are wrapped in safe() calls, converting 5646 * any exceptions to RuntimeExceptions. Binary data (InputStreams, byte arrays) is 5647 * converted to hexadecimal representation for readability.</p> 5648 * 5649 * @param o The object to convert to readable format. Can be <jk>null</jk>. 5650 * @return A readable string representation of the object, or <jk>null</jk> if the input was <jk>null</jk>. 5651 */ 5652 public static String readable(Object o) { 5653 if (o == null) 5654 return null; 5655 var c = o.getClass(); 5656 5657 // Special case for byte[] - must be handled before general array check 5658 if (c == byte[].class) { 5659 return toHex((byte[])o); 5660 } 5661 5662 // Check cache first 5663 var f = READIFIER_CACHE.get(c, () -> { 5664 // Find readifier from READIFIERS list 5665 // First try exact match, then isAssignableFrom 5666 var readifier = READIFIERS.stream() 5667 .filter(r -> r.forClass() == c || r.forClass().isAssignableFrom(c)) 5668 .map(Readifier::toFunction) 5669 .findFirst() 5670 .orElse(null); 5671 5672 // If readifier found, use it 5673 if (readifier != null) 5674 return readifier; 5675 5676 // If no readifier found, check if it's an array 5677 if (c.isArray()) { 5678 return x -> { 5679 var l = list(); 5680 for (var i = 0; i < Array.getLength(x); i++) { 5681 l.add(Array.get(x, i)); 5682 } 5683 return readable(l); 5684 }; 5685 } 5686 5687 // If no readifier found, use toString() as fallback 5688 return x -> x.toString(); 5689 }); 5690 5691 return f.apply(o); 5692 } 5693 5694 /** 5695 * Removes all occurrences of a substring from a string. 5696 * 5697 * <h5 class='section'>Example:</h5> 5698 * <p class='bjava'> 5699 * remove(<jk>null</jk>, <js>"x"</js>); <jc>// null</jc> 5700 * remove(<js>"hello"</js>, <jk>null</jk>); <jc>// "hello"</jc> 5701 * remove(<js>"hello world"</js>, <js>"o"</js>); <jc>// "hell wrld"</jc> 5702 * remove(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// "hello world"</jc> 5703 * </p> 5704 * 5705 * @param str The string to process. 5706 * @param remove The substring to remove. 5707 * @return The string with all occurrences of the substring removed, or <jk>null</jk> if input is <jk>null</jk>. 5708 */ 5709 public static String remove(String str, String remove) { 5710 if (isEmpty(str) || isEmpty(remove)) 5711 return str; 5712 return str.replace(remove, ""); 5713 } 5714 5715 /** 5716 * Removes diacritical marks (accents) from characters in a string. 5717 * 5718 * <p> 5719 * Normalizes the string to NFD form and removes combining diacritical marks. 5720 * 5721 * <h5 class='section'>Example:</h5> 5722 * <p class='bjava'> 5723 * removeAccents(<js>"café"</js>); <jc>// "cafe"</jc> 5724 * removeAccents(<js>"naïve"</js>); <jc>// "naive"</jc> 5725 * removeAccents(<js>"résumé"</js>); <jc>// "resume"</jc> 5726 * </p> 5727 * 5728 * @param str The string to remove accents from. Can be <jk>null</jk>. 5729 * @return The string with accents removed, or <jk>null</jk> if input is <jk>null</jk>. 5730 */ 5731 public static String removeAccents(String str) { 5732 if (str == null) 5733 return null; 5734 5735 // Normalize to NFD (decomposed form) 5736 var normalized = Normalizer.normalize(str, Normalizer.Form.NFD); 5737 5738 // Remove combining diacritical marks (Unicode category Mn) 5739 var sb = new StringBuilder(normalized.length()); 5740 for (var i = 0; i < normalized.length(); i++) { 5741 var c = normalized.charAt(i); 5742 var type = Character.getType(c); 5743 // Mn = Nonspacing_Mark (combining marks) 5744 if (type != Character.NON_SPACING_MARK) { 5745 sb.append(c); 5746 } 5747 } 5748 5749 return sb.toString(); 5750 } 5751 5752 /** 5753 * Removes multiple substrings from a string. 5754 * 5755 * <h5 class='section'>Example:</h5> 5756 * <p class='bjava'> 5757 * removeAll(<js>"hello world test"</js>, <js>"hello"</js>, <js>"test"</js>); <jc>// " world "</jc> 5758 * removeAll(<jk>null</jk>, <js>"x"</js>); <jc>// null</jc> 5759 * </p> 5760 * 5761 * @param str The string to process. 5762 * @param remove The substrings to remove. 5763 * @return The string with all specified substrings removed, or <jk>null</jk> if input is <jk>null</jk>. 5764 */ 5765 public static String removeAll(String str, String...remove) { 5766 if (str == null) 5767 return null; 5768 if (isEmpty(str) || remove == null || remove.length == 0) 5769 return str; 5770 var result = str; 5771 for (var r : remove) { 5772 if (r != null) 5773 result = result.replace(r, ""); 5774 } 5775 return result; 5776 } 5777 5778 /** 5779 * Removes control characters from a string, replacing them with spaces. 5780 * 5781 * <h5 class='section'>Example:</h5> 5782 * <p class='bjava'> 5783 * removeControlChars(<js>"hello\u0000\u0001world"</js>); <jc>// "hello world"</jc> 5784 * removeControlChars(<js>"hello\nworld"</js>); <jc>// "hello\nworld"</jc> 5785 * </p> 5786 * 5787 * @param str The string to process. 5788 * @return The string with control characters replaced by spaces (except whitespace control chars), or <jk>null</jk> if input is <jk>null</jk>. 5789 */ 5790 public static String removeControlChars(String str) { 5791 if (str == null) 5792 return null; 5793 var sb = new StringBuilder(); 5794 for (var i = 0; i < str.length(); i++) { 5795 var c = str.charAt(i); 5796 if (Character.isISOControl(c) && ! Character.isWhitespace(c)) 5797 sb.append(' '); 5798 else 5799 sb.append(c); 5800 } 5801 return sb.toString(); 5802 } 5803 5804 /** 5805 * Removes a suffix from a string if present. 5806 * 5807 * <h5 class='section'>Example:</h5> 5808 * <p class='bjava'> 5809 * removeEnd(<jk>null</jk>, <js>"x"</js>); <jc>// null</jc> 5810 * removeEnd(<js>"hello"</js>, <jk>null</jk>); <jc>// "hello"</jc> 5811 * removeEnd(<js>"hello world"</js>, <js>"world"</js>); <jc>// "hello "</jc> 5812 * removeEnd(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// "hello world"</jc> 5813 * </p> 5814 * 5815 * @param str The string to process. 5816 * @param suffix The suffix to remove. 5817 * @return The string with the suffix removed if present, or <jk>null</jk> if input is <jk>null</jk>. 5818 */ 5819 public static String removeEnd(String str, String suffix) { 5820 if (isEmpty(str) || isEmpty(suffix)) 5821 return str; 5822 if (str.endsWith(suffix)) 5823 return str.substring(0, str.length() - suffix.length()); 5824 return str; 5825 } 5826 5827 /** 5828 * Removes non-printable characters from a string. 5829 * 5830 * <h5 class='section'>Example:</h5> 5831 * <p class='bjava'> 5832 * removeNonPrintable(<js>"hello\u0000world"</js>); <jc>// "helloworld"</jc> 5833 * </p> 5834 * 5835 * @param str The string to process. 5836 * @return The string with non-printable characters removed, or <jk>null</jk> if input is <jk>null</jk>. 5837 */ 5838 public static String removeNonPrintable(String str) { 5839 if (str == null) 5840 return null; 5841 return str.replaceAll("\\p{C}", ""); 5842 } 5843 5844 /** 5845 * Removes a prefix from a string if present. 5846 * 5847 * <h5 class='section'>Example:</h5> 5848 * <p class='bjava'> 5849 * removeStart(<jk>null</jk>, <js>"x"</js>); <jc>// null</jc> 5850 * removeStart(<js>"hello"</js>, <jk>null</jk>); <jc>// "hello"</jc> 5851 * removeStart(<js>"hello world"</js>, <js>"hello"</js>); <jc>// " world"</jc> 5852 * removeStart(<js>"hello world"</js>, <js>"xyz"</js>); <jc>// "hello world"</jc> 5853 * </p> 5854 * 5855 * @param str The string to process. 5856 * @param prefix The prefix to remove. 5857 * @return The string with the prefix removed if present, or <jk>null</jk> if input is <jk>null</jk>. 5858 */ 5859 public static String removeStart(String str, String prefix) { 5860 if (isEmpty(str) || isEmpty(prefix)) 5861 return str; 5862 if (str.startsWith(prefix)) 5863 return str.substring(prefix.length()); 5864 return str; 5865 } 5866 5867 /** 5868 * Removes all underscore characters from a string. 5869 * 5870 * <p> 5871 * This method is commonly used to process numeric literals that may contain underscores for readability 5872 * (e.g., <js>"1_000_000"</js> becomes <js>"1000000"</js>), as Java allows underscores in numeric literals 5873 * but some parsing methods do not support them. 5874 * 5875 * <p> 5876 * If the string does not contain any underscores, the original string is returned (no new object created). 5877 * 5878 * <h5 class='section'>Example:</h5> 5879 * <p class='bjava'> 5880 * removeUnderscores(<js>"1_000_000"</js>); <jc>// "1000000"</jc> 5881 * removeUnderscores(<js>"1_000.5"</js>); <jc>// "1000.5"</jc> 5882 * removeUnderscores(<js>"hello_world"</js>); <jc>// "helloworld"</jc> 5883 * removeUnderscores(<js>"no_underscores"</js>); <jc>// "nounderscores"</jc> 5884 * removeUnderscores(<js>"Hello"</js>); <jc>// "Hello" (no change, same object returned)</jc> 5885 * </p> 5886 * 5887 * @param value The string from which to remove underscores. Must not be <jk>null</jk>. 5888 * @return A new string with all underscores removed, or the original string if it contains no underscores. 5889 * @throws IllegalArgumentException If <c>value</c> is <jk>null</jk>. 5890 * @see #parseInt(String) 5891 * @see #parseLong(String) 5892 * @see #parseFloat(String) 5893 * @see #parseNumber(String, Class) 5894 */ 5895 public static String removeUnderscores(String value) { 5896 assertArgNotNull("value", value); 5897 return notContains(value, '_') ? value : value.replace("_", ""); 5898 } 5899 5900 /** 5901 * Creates a repeated pattern. 5902 * 5903 * @param count The number of times to repeat the pattern. 5904 * @param pattern The pattern to repeat. 5905 * @return A new string consisting of the repeated pattern. 5906 */ 5907 public static String repeat(int count, String pattern) { 5908 var sb = new StringBuilder(pattern.length() * count); 5909 for (var i = 0; i < count; i++) 5910 sb.append(pattern); 5911 return sb.toString(); 5912 } 5913 5914 /** 5915 * Replaces <js>"\\uXXXX"</js> character sequences with their unicode characters. 5916 * 5917 * @param s The string to replace unicode sequences in. 5918 * @return A string with unicode sequences replaced. 5919 */ 5920 public static String replaceUnicodeSequences(String s) { 5921 5922 if (s.indexOf('\\') == -1) 5923 return s; 5924 5925 var p = Pattern.compile("\\\\u(\\p{XDigit}{4})"); 5926 var m = p.matcher(s); 5927 var sb = new StringBuffer(s.length()); 5928 5929 while (m.find()) { 5930 var ch = String.valueOf((char)Integer.parseInt(m.group(1), 16)); 5931 m.appendReplacement(sb, Matcher.quoteReplacement(ch)); 5932 } 5933 5934 m.appendTail(sb); 5935 return sb.toString(); 5936 } 5937 5938 /** 5939 * Reverses a string. 5940 * 5941 * <h5 class='section'>Example:</h5> 5942 * <p class='bjava'> 5943 * reverse(<jk>null</jk>); <jc>// null</jc> 5944 * reverse(<js>""</js>); <jc>// ""</jc> 5945 * reverse(<js>"hello"</js>); <jc>// "olleh"</jc> 5946 * </p> 5947 * 5948 * @param str The string to reverse. 5949 * @return The reversed string, or <jk>null</jk> if input is <jk>null</jk>. 5950 */ 5951 public static String reverse(String str) { 5952 if (str == null) 5953 return null; 5954 return new StringBuilder(str).reverse().toString(); 5955 } 5956 5957 /** 5958 * Returns the rightmost characters of a string. 5959 * 5960 * <h5 class='section'>Example:</h5> 5961 * <p class='bjava'> 5962 * right(<jk>null</jk>, 3); <jc>// null</jc> 5963 * right(<js>""</js>, 3); <jc>// ""</jc> 5964 * right(<js>"hello"</js>, 3); <jc>// "llo"</jc> 5965 * right(<js>"hello"</js>, 10); <jc>// "hello"</jc> 5966 * </p> 5967 * 5968 * @param str The string to get characters from. 5969 * @param len The number of characters to get. 5970 * @return The rightmost characters, or <jk>null</jk> if input is <jk>null</jk>. 5971 */ 5972 public static String right(String str, int len) { 5973 if (str == null) 5974 return null; 5975 if (len < 0) 5976 return ""; 5977 if (len >= str.length()) 5978 return str; 5979 return str.substring(str.length() - len); 5980 } 5981 5982 /** 5983 * Basic HTML/XML sanitization - removes or escapes potentially dangerous content. 5984 * 5985 * <p> 5986 * Removes HTML/XML tags and escapes special characters to prevent XSS attacks. 5987 * This is a basic sanitization - for production use, consider a more robust library. 5988 * 5989 * <h5 class='section'>Example:</h5> 5990 * <p class='bjava'> 5991 * sanitize(<js>"<script>alert('xss')</script>"</js>); <jc>// "&lt;script&gt;alert('xss')&lt;/script&gt;"</jc> 5992 * sanitize(<js>"Hello <b>World</b>"</js>); <jc>// "Hello &lt;b&gt;World&lt;/b&gt;"</jc> 5993 * </p> 5994 * 5995 * @param str The string to sanitize. 5996 * @return The sanitized string with HTML/XML tags escaped, or <jk>null</jk> if input is <jk>null</jk>. 5997 */ 5998 public static String sanitize(String str) { 5999 if (str == null) 6000 return null; 6001 // Escape HTML/XML special characters 6002 return escapeHtml(str); 6003 } 6004 6005 /** 6006 * Calculates the similarity percentage between two strings using Levenshtein distance. 6007 * 6008 * <p> 6009 * Returns a value between 0.0 (completely different) and 1.0 (identical). 6010 * 6011 * <h5 class='section'>Example:</h5> 6012 * <p class='bjava'> 6013 * similarity(<js>"hello"</js>, <js>"hello"</js>); <jc>// 1.0 (100%)</jc> 6014 * similarity(<js>"kitten"</js>, <js>"sitting"</js>); <jc>// ~0.57 (57%)</jc> 6015 * similarity(<js>"abc"</js>, <js>"xyz"</js>); <jc>// 0.0 (0%)</jc> 6016 * </p> 6017 * 6018 * @param str1 The first string. 6019 * @param str2 The second string. 6020 * @return A similarity value between 0.0 and 1.0, where 1.0 means identical. 6021 */ 6022 public static double similarity(String str1, String str2) { 6023 if (str1 == null) 6024 str1 = ""; 6025 if (str2 == null) 6026 str2 = ""; 6027 6028 if (str1.equals(str2)) 6029 return 1.0; 6030 6031 var maxLen = Math.max(str1.length(), str2.length()); 6032 6033 var distance = levenshteinDistance(str1, str2); 6034 return 1.0 - ((double)distance / maxLen); 6035 } 6036 6037 /** 6038 * Converts a string to snake_case format. 6039 * 6040 * <p> 6041 * Handles various input formats: 6042 * <ul> 6043 * <li>Space-separated: "hello world" → "hello_world"</li> 6044 * <li>CamelCase: "helloWorld" → "hello_world"</li> 6045 * <li>PascalCase: "HelloWorld" → "hello_world"</li> 6046 * <li>Kebab-case: "hello-world" → "hello_world"</li> 6047 * </ul> 6048 * 6049 * <h5 class='section'>Example:</h5> 6050 * <p class='bjava'> 6051 * snakeCase(<jk>null</jk>); <jc>// null</jc> 6052 * snakeCase(<js>""</js>); <jc>// ""</jc> 6053 * snakeCase(<js>"hello world"</js>); <jc>// "hello_world"</jc> 6054 * snakeCase(<js>"helloWorld"</js>); <jc>// "hello_world"</jc> 6055 * snakeCase(<js>"HelloWorld"</js>); <jc>// "hello_world"</jc> 6056 * snakeCase(<js>"hello-world"</js>); <jc>// "hello_world"</jc> 6057 * </p> 6058 * 6059 * @param str The string to convert. 6060 * @return The snake_case string, or <jk>null</jk> if input is <jk>null</jk>. 6061 */ 6062 public static String snakeCase(String str) { 6063 if (str == null) 6064 return null; 6065 if (isEmpty(str)) 6066 return str; 6067 6068 var words = splitWords(str); 6069 if (words.isEmpty()) 6070 return ""; 6071 6072 var result = new StringBuilder(); 6073 for (var i = 0; i < words.size(); i++) { 6074 if (i > 0) 6075 result.append('_'); 6076 result.append(words.get(i).toLowerCase()); 6077 } 6078 6079 return result.toString(); 6080 } 6081 6082 /** 6083 * Sorts a string array in natural order. 6084 * 6085 * <p> 6086 * Returns <jk>null</jk> if the array is <jk>null</jk>. 6087 * This method creates a copy of the array and sorts it, leaving the original array unchanged. 6088 * 6089 * <h5 class='section'>Examples:</h5> 6090 * <p class='bjava'> 6091 * String[] <jv>array</jv> = {<js>"zebra"</js>, <js>"apple"</js>, <js>"banana"</js>}; 6092 * String[] <jv>sorted</jv> = sort(<jv>array</jv>); 6093 * <jc>// Returns: ["apple", "banana", "zebra"]</jc> 6094 * </p> 6095 * 6096 * @param array The array to sort. Can be <jk>null</jk>. 6097 * @return A new sorted array, or <jk>null</jk> if the array was <jk>null</jk>. 6098 */ 6099 public static String[] sort(String[] array) { 6100 if (array == null) 6101 return null; // NOSONAR - Intentional. 6102 var result = Arrays.copyOf(array, array.length); 6103 Arrays.sort(result); 6104 return result; 6105 } 6106 6107 /** 6108 * Sorts a string array in case-insensitive order. 6109 * 6110 * <p> 6111 * Returns <jk>null</jk> if the array is <jk>null</jk>. 6112 * This method creates a copy of the array and sorts it using case-insensitive comparison, 6113 * leaving the original array unchanged. 6114 * 6115 * <h5 class='section'>Examples:</h5> 6116 * <p class='bjava'> 6117 * String[] <jv>array</jv> = {<js>"Zebra"</js>, <js>"apple"</js>, <js>"Banana"</js>}; 6118 * String[] <jv>sorted</jv> = sortIgnoreCase(<jv>array</jv>); 6119 * <jc>// Returns: ["apple", "Banana", "Zebra"]</jc> 6120 * </p> 6121 * 6122 * @param array The array to sort. Can be <jk>null</jk>. 6123 * @return A new sorted array (case-insensitive), or <jk>null</jk> if the array was <jk>null</jk>. 6124 */ 6125 public static String[] sortIgnoreCase(String[] array) { 6126 if (array == null) 6127 return null; // NOSONAR - Intentional. 6128 var result = Arrays.copyOf(array, array.length); 6129 Arrays.sort(result, String.CASE_INSENSITIVE_ORDER); 6130 return result; 6131 } 6132 6133 /** 6134 * Generates a Soundex code for a string. 6135 * 6136 * <p> 6137 * Soundex is a phonetic algorithm for indexing names by sound. The code consists of 6138 * a letter followed by three digits. Similar-sounding names produce the same code. 6139 * 6140 * <h5 class='section'>Example:</h5> 6141 * <p class='bjava'> 6142 * soundex(<js>"Smith"</js>); <jc>// "S530"</jc> 6143 * soundex(<js>"Smythe"</js>); <jc>// "S530"</jc> 6144 * soundex(<js>"Robert"</js>); <jc>// "R163"</jc> 6145 * </p> 6146 * 6147 * @param str The string to generate a Soundex code for. Can be <jk>null</jk>. 6148 * @return The Soundex code (1 letter + 3 digits), or <jk>null</jk> if input is <jk>null</jk> or empty. 6149 */ 6150 public static String soundex(String str) { 6151 if (isEmpty(str)) 6152 return null; 6153 6154 var upper = str.toUpperCase(); 6155 var result = new StringBuilder(4); 6156 result.append(upper.charAt(0)); 6157 6158 // Soundex mapping: 0 = AEIOUHWY, 1 = BFPV, 2 = CGJKQSXZ, 3 = DT, 4 = L, 5 = MN, 6 = R 6159 // H/W/Y don't get codes but don't break sequences either 6160 // Initialize lastCode to a value that won't match any real code 6161 var lastCode = '\0'; 6162 6163 for (var i = 1; i < upper.length() && result.length() < 4; i++) { 6164 var c = upper.charAt(i); 6165 var code = getSoundexCode(c); 6166 if (code == '0') { 6167 // H/W/Y/vowels - don't add code, but don't update lastCode either 6168 // This allows sequences to continue across H/W/Y/vowels 6169 continue; 6170 } 6171 if (code != lastCode) { 6172 result.append(code); 6173 lastCode = code; 6174 } 6175 // If code == lastCode, skip it (consecutive same codes) 6176 } 6177 6178 // Pad with zeros if needed 6179 while (result.length() < 4) { 6180 result.append('0'); 6181 } 6182 6183 return result.toString(); 6184 } 6185 6186 /** 6187 * Splits a comma-delimited list into a list of strings. 6188 * 6189 * @param s The string to split. 6190 * @return A list of split strings, or an empty list if the input is <jk>null</jk>. 6191 */ 6192 public static List<String> split(String s) { 6193 return s == null ? Collections.emptyList() : split(s, ','); 6194 } 6195 6196 /** 6197 * Splits a character-delimited string into a string array. 6198 * 6199 * <p> 6200 * Does not split on escaped-delimiters (e.g. "\,"); 6201 * Resulting tokens are trimmed of whitespace. 6202 * 6203 * <p> 6204 * <b>NOTE:</b> This behavior is different than the Jakarta equivalent. 6205 * split("a,b,c",',') -> {"a","b","c"} 6206 * split("a, b ,c ",',') -> {"a","b","c"} 6207 * split("a,,c",',') -> {"a","","c"} 6208 * split(",,",',') -> {"","",""} 6209 * split("",',') -> {} 6210 * split(null,',') -> null 6211 * split("a,b\,c,d", ',', false) -> {"a","b\,c","d"} 6212 * split("a,b\\,c,d", ',', false) -> {"a","b\","c","d"} 6213 * split("a,b\,c,d", ',', true) -> {"a","b,c","d"} 6214 * 6215 * @param s The string to split. Can be <jk>null</jk>. 6216 * @param c The character to split on. 6217 * @return The tokens, or <jk>null</jk> if the string was null. 6218 */ 6219 public static List<String> split(String s, char c) { 6220 return split(s, c, Integer.MAX_VALUE); 6221 } 6222 6223 /** 6224 * Same as {@link splita} but consumes the tokens instead of creating an array. 6225 * 6226 * @param s The string to split. 6227 * @param c The character to split on. 6228 * @param consumer The consumer of the tokens. 6229 */ 6230 public static void split(String s, char c, Consumer<String> consumer) { 6231 var escapeChars = getEscapeSet(c); 6232 6233 if (isEmpty(s)) 6234 return; 6235 if (s.indexOf(c) == -1) { 6236 consumer.accept(s); 6237 return; 6238 } 6239 6240 var x1 = 0; 6241 var escapeCount = 0; 6242 6243 for (var i = 0; i < s.length(); i++) { 6244 if (s.charAt(i) == '\\') 6245 escapeCount++; 6246 else if (s.charAt(i) == c && escapeCount % 2 == 0) { 6247 var s2 = s.substring(x1, i); 6248 var s3 = unescapeChars(s2, escapeChars); 6249 consumer.accept(s3.trim()); // NOSONAR - NPE not possible. 6250 x1 = i + 1; 6251 } 6252 if (s.charAt(i) != '\\') 6253 escapeCount = 0; 6254 } 6255 var s2 = s.substring(x1); 6256 var s3 = unescapeChars(s2, escapeChars); 6257 consumer.accept(s3.trim()); // NOSONAR - NPE not possible. 6258 } 6259 6260 /** 6261 * Same as {@link splita} but limits the number of tokens returned. 6262 * 6263 * @param s The string to split. Can be <jk>null</jk>. 6264 * @param c The character to split on. 6265 * @param limit The maximum number of tokens to return. 6266 * @return The tokens, or <jk>null</jk> if the string was null. 6267 */ 6268 public static List<String> split(String s, char c, int limit) { 6269 6270 var escapeChars = getEscapeSet(c); 6271 6272 if (s == null) 6273 return null; // NOSONAR - Intentional. 6274 if (isEmpty(s)) 6275 return Collections.emptyList(); 6276 if (s.indexOf(c) == -1) 6277 return Collections.singletonList(s); 6278 6279 var l = new LinkedList<String>(); 6280 var sArray = s.toCharArray(); 6281 var x1 = 0; 6282 var escapeCount = 0; 6283 limit--; 6284 for (var i = 0; i < sArray.length && limit > 0; i++) { 6285 if (sArray[i] == '\\') 6286 escapeCount++; 6287 else if (sArray[i] == c && escapeCount % 2 == 0) { 6288 var s2 = new String(sArray, x1, i - x1); 6289 var s3 = unescapeChars(s2, escapeChars); 6290 l.add(s3.trim()); 6291 limit--; 6292 x1 = i + 1; 6293 } 6294 if (sArray[i] != '\\') 6295 escapeCount = 0; 6296 } 6297 var s2 = new String(sArray, x1, sArray.length - x1); 6298 var s3 = unescapeChars(s2, escapeChars); 6299 l.add(s3.trim()); 6300 6301 return l; 6302 } 6303 6304 /** 6305 * Same as {@link splita} but consumes the tokens instead of creating an array. 6306 * 6307 * @param s The string to split. 6308 * @param consumer The consumer of the tokens. 6309 */ 6310 public static void split(String s, Consumer<String> consumer) { 6311 StringUtils.split(s, ',', consumer); 6312 } 6313 6314 /** 6315 * Splits a comma-delimited list into an array of strings. 6316 * 6317 * @param s The string to split. 6318 * @return An array of split strings. 6319 */ 6320 public static String[] splita(String s) { 6321 return splita(s, ','); 6322 } 6323 6324 /** 6325 * Splits a character-delimited string into a string array. 6326 * 6327 * <p> 6328 * Does not split on escaped-delimiters (e.g. "\,"); 6329 * Resulting tokens are trimmed of whitespace. 6330 * 6331 * <p> 6332 * <b>NOTE:</b> This behavior is different than the Jakarta equivalent. 6333 * split("a,b,c",',') -> {"a","b","c"} 6334 * split("a, b ,c ",',') -> {"a","b","c"} 6335 * split("a,,c",',') -> {"a","","c"} 6336 * split(",,",',') -> {"","",""} 6337 * split("",',') -> {} 6338 * split(null,',') -> null 6339 * split("a,b\,c,d", ',', false) -> {"a","b\,c","d"} 6340 * split("a,b\\,c,d", ',', false) -> {"a","b\","c","d"} 6341 * split("a,b\,c,d", ',', true) -> {"a","b,c","d"} 6342 * 6343 * @param s The string to split. Can be <jk>null</jk>. 6344 * @param c The character to split on. 6345 * @return The tokens, or <jk>null</jk> if the string was null. 6346 */ 6347 public static String[] splita(String s, char c) { 6348 return splita(s, c, Integer.MAX_VALUE); 6349 } 6350 6351 /** 6352 * Same as {@link #splita(String, char)} but limits the number of tokens returned. 6353 * 6354 * @param s The string to split. Can be <jk>null</jk>. 6355 * @param c The character to split on. 6356 * @param limit The maximum number of tokens to return. 6357 * @return The tokens, or <jk>null</jk> if the string was null. 6358 */ 6359 public static String[] splita(String s, char c, int limit) { 6360 var l = StringUtils.split(s, c, limit); 6361 return l == null ? null : l.toArray(new String[l.size()]); 6362 } 6363 6364 /** 6365 * Same as {@link #splita(String, char)} except splits all strings in the input and returns a single result. 6366 * 6367 * @param s The string to split. Can be <jk>null</jk>. 6368 * @param c The character to split on. 6369 * @return The tokens, or null if the input array was null 6370 */ 6371 public static String[] splita(String[] s, char c) { 6372 if (s == null) 6373 return null; // NOSONAR - Intentional. 6374 var l = new LinkedList<String>(); 6375 for (var ss : s) { 6376 if (ss == null || ss.indexOf(c) == -1) 6377 l.add(ss); 6378 else 6379 Collections.addAll(l, splita(ss, c)); 6380 } 6381 return l.toArray(new String[l.size()]); 6382 } 6383 6384 /** 6385 * Splits a list of key-value pairs into an ordered map. 6386 * 6387 * <p> 6388 * Example: 6389 * <p class='bjava'> 6390 * String <jv>in</jv> = <js>"foo=1;bar=2"</js>; 6391 * Map <jv>map</jv> = StringUtils.<jsm>splitMap</jsm>(in, <js>';'</js>, <js>'='</js>, <jk>true</jk>); 6392 * </p> 6393 * 6394 * @param s The string to split. 6395 * @param trim Trim strings after parsing. 6396 * @return The parsed map, or null if the string was null. 6397 */ 6398 public static Map<String,String> splitMap(String s, boolean trim) { 6399 6400 if (s == null) 6401 return null; // NOSONAR - Intentional. 6402 if (isEmpty(s)) 6403 return mape(); 6404 6405 var m = new LinkedHashMap<String,String>(); 6406 6407 // S1: Found start of key, looking for equals. 6408 // S2: Found equals, looking for delimiter (or end). 6409 6410 var state = S1; 6411 6412 var sArray = s.toCharArray(); 6413 var x1 = 0; 6414 var escapeCount = 0; 6415 var key = (String)null; 6416 for (var i = 0; i < sArray.length + 1; i++) { 6417 var c = i == sArray.length ? ',' : sArray[i]; 6418 if (c == '\\') 6419 escapeCount++; 6420 if (escapeCount % 2 == 0) { 6421 if (state == S1) { 6422 if (c == '=') { 6423 key = s.substring(x1, i); 6424 if (trim) 6425 key = trim(key); 6426 key = unescapeChars(key, MAP_ESCAPE_SET); 6427 state = S2; 6428 x1 = i + 1; 6429 } else if (c == ',') { 6430 key = s.substring(x1, i); 6431 if (trim) 6432 key = trim(key); 6433 key = unescapeChars(key, MAP_ESCAPE_SET); 6434 m.put(key, ""); 6435 state = S1; 6436 x1 = i + 1; 6437 } 6438 } else /* state == S2 */ { 6439 if (c == ',') { // NOSONAR - Intentional. 6440 var val = s.substring(x1, i); 6441 if (trim) 6442 val = trim(val); 6443 val = unescapeChars(val, MAP_ESCAPE_SET); 6444 m.put(key, val); 6445 key = null; 6446 x1 = i + 1; 6447 state = S1; 6448 } 6449 } 6450 } 6451 if (c != '\\') 6452 escapeCount = 0; 6453 } 6454 6455 return m; 6456 } 6457 6458 /** 6459 * Splits the method arguments in the signature of a method. 6460 * 6461 * @param s The arguments to split. 6462 * @return The split arguments, or null if the input string is null. 6463 */ 6464 public static String[] splitMethodArgs(String s) { 6465 6466 if (s == null) 6467 return null; // NOSONAR - Intentional. 6468 if (isEmpty(s)) 6469 return new String[0]; 6470 if (s.indexOf(',') == -1) 6471 return a(s); 6472 6473 var l = new LinkedList<String>(); 6474 var sArray = s.toCharArray(); 6475 var x1 = 0; 6476 var paramDepth = 0; 6477 6478 for (var i = 0; i < sArray.length; i++) { 6479 var c = s.charAt(i); 6480 if (c == '>') 6481 paramDepth++; 6482 else if (c == '<') 6483 paramDepth--; 6484 else if (c == ',' && paramDepth == 0) { 6485 var s2 = new String(sArray, x1, i - x1); 6486 l.add(s2.trim()); 6487 x1 = i + 1; 6488 } 6489 } 6490 6491 var s2 = new String(sArray, x1, sArray.length - x1); 6492 l.add(s2.trim()); 6493 6494 return l.toArray(new String[l.size()]); 6495 } 6496 6497 /** 6498 * Splits a comma-delimited list containing "nesting constructs". 6499 * 6500 * Nesting constructs are simple embedded "{...}" comma-delimted lists. 6501 * 6502 * Example: 6503 * "a{b,c},d" -> ["a{b,c}","d"] 6504 * 6505 * Handles escapes and trims whitespace from tokens. 6506 * 6507 * @param s The input string. 6508 * @return 6509 * The results, or <jk>null</jk> if the input was <jk>null</jk>. 6510 * <br>An empty string results in an empty array. 6511 */ 6512 public static List<String> splitNested(String s) { 6513 var escapeChars = getEscapeSet(','); 6514 6515 if (s == null) 6516 return null; // NOSONAR - Intentional. 6517 if (isEmpty(s)) 6518 return Collections.emptyList(); 6519 if (s.indexOf(',') == -1) 6520 return Collections.singletonList(trim(s)); 6521 6522 var l = new LinkedList<String>(); 6523 6524 var x1 = 0; 6525 var inEscape = false; 6526 var depthCount = 0; 6527 6528 for (var i = 0; i < s.length(); i++) { 6529 var c = s.charAt(i); 6530 if (inEscape) { 6531 if (c == '\\') { 6532 inEscape = false; 6533 } 6534 } else { 6535 if (c == '\\') { 6536 inEscape = true; 6537 } else if (c == '{') { 6538 depthCount++; 6539 } else if (c == '}') { 6540 depthCount--; 6541 } else if (c == ',' && depthCount == 0) { 6542 l.add(trim(unescapeChars(s.substring(x1, i), escapeChars))); 6543 x1 = i + 1; 6544 } 6545 } 6546 } 6547 l.add(trim(unescapeChars(s.substring(x1, s.length()), escapeChars))); 6548 6549 return l; 6550 } 6551 6552 /** 6553 * Splits a nested comma-delimited list. 6554 * 6555 * Nesting constructs are simple embedded "{...}" comma-delimted lists. 6556 * 6557 * Example: 6558 * "a{b,c{d,e}}" -> ["b","c{d,e}"] 6559 * 6560 * Handles escapes and trims whitespace from tokens. 6561 * 6562 * @param s The input string. 6563 * @return 6564 * The results, or <jk>null</jk> if the input was <jk>null</jk>. 6565 * <br>An empty string results in an empty array. 6566 */ 6567 public static List<String> splitNestedInner(String s) { 6568 assertArg(nn(s), "String was null."); 6569 assertArg(ne(s), "String was empty."); 6570 6571 // S1: Looking for '{' 6572 // S2: Found '{', looking for '}' 6573 6574 var start = -1; 6575 var end = -1; 6576 var state = S1; 6577 6578 var depth = 0; 6579 var inEscape = false; 6580 6581 for (var i = 0; i < s.length(); i++) { 6582 var c = s.charAt(i); 6583 if (inEscape) { 6584 if (c == '\\') { 6585 inEscape = false; 6586 } 6587 } else { 6588 if (c == '\\') { 6589 inEscape = true; 6590 } else if (state == S1) { 6591 if (c == '{') { 6592 start = i + 1; 6593 state = S2; 6594 } 6595 } else /* state == S2 */ { 6596 if (c == '{') { 6597 depth++; 6598 } else if (depth > 0 && c == '}') { 6599 depth--; 6600 } else if (c == '}') { 6601 end = i; 6602 break; 6603 } 6604 } 6605 } 6606 } 6607 6608 if (start == -1) 6609 throw illegalArg("Start character '{' not found in string: {0}", s); 6610 if (end == -1) 6611 throw illegalArg("End character '}' not found in string {0}", s); 6612 return splitNested(s.substring(start, end)); 6613 } 6614 6615 /** 6616 * Splits a space-delimited string with optionally quoted arguments. 6617 * 6618 * <p> 6619 * Examples: 6620 * <ul> 6621 * <li><js>"foo"</js> => <c>["foo"]</c> 6622 * <li><js>" foo "</js> => <c>["foo"]</c> 6623 * <li><js>"foo bar baz"</js> => <c>["foo","bar","baz"]</c> 6624 * <li><js>"foo 'bar baz'"</js> => <c>["foo","bar baz"]</c> 6625 * <li><js>"foo \"bar baz\""</js> => <c>["foo","bar baz"]</c> 6626 * <li><js>"foo 'bar\'baz'"</js> => <c>["foo","bar'baz"]</c> 6627 * </ul> 6628 * 6629 * @param s The input string. 6630 * @return 6631 * The results, or <jk>null</jk> if the input was <jk>null</jk>. 6632 * <br>An empty string results in an empty array. 6633 */ 6634 public static String[] splitQuoted(String s) { 6635 return splitQuoted(s, false); 6636 } 6637 6638 /** 6639 * Same as {@link StringUtils#splitQuoted(String)} but allows you to optionally keep the quote characters. 6640 * 6641 * @param s The input string. 6642 * @param keepQuotes If <jk>true</jk>, quote characters are kept on the tokens. 6643 * @return 6644 * The results, or <jk>null</jk> if the input was <jk>null</jk>. 6645 * <br>An empty string results in an empty array. 6646 */ 6647 public static String[] splitQuoted(String s, boolean keepQuotes) { 6648 6649 if (s == null) 6650 return null; // NOSONAR - Intentional. 6651 6652 s = s.trim(); 6653 6654 if (isEmpty(s)) 6655 return a(); 6656 6657 if (! containsAny(s, ' ', '\t', '\'', '"')) 6658 return a(s); 6659 6660 // S1: Looking for start of token. 6661 // S2: Found ', looking for end ' 6662 // S3: Found ", looking for end " 6663 // S4: Found non-whitespace, looking for end whitespace. 6664 6665 var state = S1; 6666 6667 var isInEscape = false; 6668 var needsUnescape = false; 6669 var mark = 0; 6670 6671 var l = new ArrayList<String>(); 6672 for (var i = 0; i < s.length(); i++) { 6673 var c = s.charAt(i); 6674 6675 if (state == S1) { 6676 if (c == '\'') { 6677 state = S2; 6678 mark = keepQuotes ? i : i + 1; 6679 } else if (c == '"') { 6680 state = S3; 6681 mark = keepQuotes ? i : i + 1; 6682 } else if (c != ' ' && c != '\t') { 6683 state = S4; 6684 mark = i; 6685 } 6686 } else if (state == S2 || state == S3) { 6687 if (c == '\\') { 6688 isInEscape = ! isInEscape; 6689 needsUnescape = ! keepQuotes; 6690 } else if (! isInEscape) { 6691 if (c == (state == S2 ? '\'' : '"')) { 6692 var s2 = s.substring(mark, keepQuotes ? i + 1 : i); 6693 if (needsUnescape) // NOSONAR - False positive check. 6694 s2 = unescapeChars(s2, QUOTE_ESCAPE_SET); 6695 l.add(s2); 6696 state = S1; 6697 isInEscape = needsUnescape = false; 6698 } 6699 } else { 6700 isInEscape = false; 6701 } 6702 } else /* state == S4 */ { 6703 if (c == ' ' || c == '\t') { 6704 l.add(s.substring(mark, i)); 6705 state = S1; 6706 } 6707 } 6708 } 6709 if (state == S4) 6710 l.add(s.substring(mark)); 6711 else if (state == S2 || state == S3) 6712 throw illegalArg("Unmatched string quotes: {0}", s); 6713 return l.toArray(new String[l.size()]); 6714 } 6715 6716 /** 6717 * An efficient method for checking if a string starts with a character. 6718 * 6719 * @param s The string to check. Can be <jk>null</jk>. 6720 * @param c The character to check for. 6721 * @return <jk>true</jk> if the specified string is not <jk>null</jk> and starts with the specified character. 6722 */ 6723 public static boolean startsWith(String s, char c) { 6724 if (nn(s)) { 6725 var i = s.length(); 6726 if (i > 0) 6727 return s.charAt(0) == c; 6728 } 6729 return false; 6730 } 6731 6732 /** 6733 * Checks if a string starts with a prefix, ignoring case. 6734 * 6735 * <h5 class='section'>Example:</h5> 6736 * <p class='bjava'> 6737 * startsWithIgnoreCase(<js>"Hello World"</js>, <js>"hello"</js>); <jc>// true</jc> 6738 * startsWithIgnoreCase(<js>"Hello World"</js>, <js>"HELLO"</js>); <jc>// true</jc> 6739 * startsWithIgnoreCase(<js>"hello world"</js>, <js>"world"</js>); <jc>// false</jc> 6740 * </p> 6741 * 6742 * @param str The string to check. 6743 * @param prefix The prefix to check for. 6744 * @return <jk>true</jk> if the string starts with the prefix (ignoring case), <jk>false</jk> otherwise. 6745 */ 6746 public static boolean startsWithIgnoreCase(String str, String prefix) { 6747 if (str == null || prefix == null) 6748 return false; 6749 return str.toLowerCase().startsWith(prefix.toLowerCase()); 6750 } 6751 6752 /** 6753 * Takes a supplier of any type and returns a {@link Supplier}{@code <String>}. 6754 * 6755 * <p>Useful when passing arguments to loggers. 6756 * 6757 * @param s The supplier. 6758 * @return A string supplier that calls {@link #readable(Object)} on the supplied value. 6759 */ 6760 public static Supplier<String> stringSupplier(Supplier<?> s) { 6761 return () -> readable(s.get()); 6762 } 6763 6764 /** 6765 * Strips the first and last character from a string. 6766 * 6767 * @param s The string to strip. 6768 * @return The striped string, or the same string if the input was <jk>null</jk> or less than length 2. 6769 */ 6770 public static String strip(String s) { 6771 if (s == null || s.length() <= 1) 6772 return s; 6773 return s.substring(1, s.length() - 1); 6774 } 6775 6776 /** 6777 * Strips invalid characters such as CTRL characters from a string meant to be encoded 6778 * as an HTTP header value. 6779 * 6780 * @param s The string to strip chars from. 6781 * @return The string with invalid characters removed. 6782 */ 6783 public static String stripInvalidHttpHeaderChars(String s) { 6784 6785 if (s == null) 6786 return null; 6787 6788 var needsReplace = false; 6789 for (var i = 0; i < s.length() && ! needsReplace; i++) 6790 needsReplace |= HTTP_HEADER_CHARS.contains(s.charAt(i)); 6791 6792 if (! needsReplace) 6793 return s; 6794 6795 var sb = new StringBuilder(s.length()); 6796 for (var i = 0; i < s.length(); i++) { 6797 var c = s.charAt(i); 6798 if (HTTP_HEADER_CHARS.contains(c)) 6799 sb.append(c); 6800 } 6801 6802 return sb.toString(); 6803 } 6804 6805 /** 6806 * Returns the substring after the first occurrence of a separator. 6807 * 6808 * <h5 class='section'>Example:</h5> 6809 * <p class='bjava'> 6810 * substringAfter(<jk>null</jk>, <js>"."</js>); <jc>// null</jc> 6811 * substringAfter(<js>"hello.world"</js>, <jk>null</jk>); <jc>// ""</jc> 6812 * substringAfter(<js>"hello.world"</js>, <js>"."</js>); <jc>// "world"</jc> 6813 * substringAfter(<js>"hello.world"</js>, <js>"xyz"</js>); <jc>// ""</jc> 6814 * </p> 6815 * 6816 * @param str The string to get a substring from. 6817 * @param separator The separator string. 6818 * @return The substring after the first occurrence of the separator, or empty string if separator not found. 6819 */ 6820 public static String substringAfter(String str, String separator) { 6821 if (isEmpty(str)) 6822 return str; 6823 if (separator == null) 6824 return ""; 6825 var pos = str.indexOf(separator); 6826 if (pos == -1) 6827 return ""; 6828 return str.substring(pos + separator.length()); 6829 } 6830 6831 /** 6832 * Returns the substring before the first occurrence of a separator. 6833 * 6834 * <h5 class='section'>Example:</h5> 6835 * <p class='bjava'> 6836 * substringBefore(<jk>null</jk>, <js>"."</js>); <jc>// null</jc> 6837 * substringBefore(<js>"hello.world"</js>, <jk>null</jk>); <jc>// "hello.world"</jc> 6838 * substringBefore(<js>"hello.world"</js>, <js>"."</js>); <jc>// "hello"</jc> 6839 * substringBefore(<js>"hello.world"</js>, <js>"xyz"</js>); <jc>// "hello.world"</jc> 6840 * </p> 6841 * 6842 * @param str The string to get a substring from. 6843 * @param separator The separator string. 6844 * @return The substring before the first occurrence of the separator, or the original string if separator not found. 6845 */ 6846 public static String substringBefore(String str, String separator) { 6847 if (isEmpty(str) || separator == null) 6848 return str; 6849 var pos = str.indexOf(separator); 6850 if (pos == -1) 6851 return str; 6852 return str.substring(0, pos); 6853 } 6854 6855 /** 6856 * Returns the substring between two delimiters. 6857 * 6858 * <h5 class='section'>Example:</h5> 6859 * <p class='bjava'> 6860 * substringBetween(<jk>null</jk>, <js>"<"</js>, <js>">"</js>); <jc>// null</jc> 6861 * substringBetween(<js>"<hello>"</js>, <js>"<"</js>, <js>">"</js>); <jc>// "hello"</jc> 6862 * substringBetween(<js>"<hello>"</js>, <js>"["</js>, <js>"]"</js>); <jc>// null</jc> 6863 * </p> 6864 * 6865 * @param str The string to get a substring from. 6866 * @param open The opening delimiter. 6867 * @param close The closing delimiter. 6868 * @return The substring between the delimiters, or <jk>null</jk> if delimiters not found. 6869 */ 6870 public static String substringBetween(String str, String open, String close) { 6871 if (str == null || open == null || close == null) 6872 return null; 6873 var start = str.indexOf(open); 6874 if (start == -1) 6875 return null; 6876 var end = str.indexOf(close, start + open.length()); 6877 if (end == -1) 6878 return null; 6879 return str.substring(start + open.length(), end); 6880 } 6881 6882 /** 6883 * Swaps the case of all characters in a string. 6884 * 6885 * <h5 class='section'>Example:</h5> 6886 * <p class='bjava'> 6887 * swapCase(<js>"Hello World"</js>); <jc>// "hELLO wORLD"</jc> 6888 * swapCase(<js>"ABC123xyz"</js>); <jc>// "abc123XYZ"</jc> 6889 * </p> 6890 * 6891 * @param str The string to process. 6892 * @return The string with case swapped, or <jk>null</jk> if input is <jk>null</jk>. 6893 */ 6894 public static String swapCase(String str) { 6895 if (str == null) 6896 return null; 6897 var sb = new StringBuilder(str.length()); 6898 for (var i = 0; i < str.length(); i++) { 6899 var c = str.charAt(i); 6900 if (LETTER_UC.contains(c)) 6901 sb.append(Character.toLowerCase(c)); 6902 else if (LETTER_LC.contains(c)) 6903 sb.append(Character.toUpperCase(c)); 6904 else 6905 sb.append(c); 6906 } 6907 return sb.toString(); 6908 } 6909 6910 /** 6911 * Converts a string to Title Case format (first letter of each word capitalized, separated by spaces). 6912 * 6913 * <p> 6914 * Handles various input formats: 6915 * <ul> 6916 * <li>CamelCase: "helloWorld" → "Hello World"</li> 6917 * <li>PascalCase: "HelloWorld" → "Hello World"</li> 6918 * <li>Snake_case: "hello_world" → "Hello World"</li> 6919 * <li>Kebab-case: "hello-world" → "Hello World"</li> 6920 * </ul> 6921 * 6922 * <h5 class='section'>Example:</h5> 6923 * <p class='bjava'> 6924 * titleCase(<jk>null</jk>); <jc>// null</jc> 6925 * titleCase(<js>""</js>); <jc>// ""</jc> 6926 * titleCase(<js>"hello world"</js>); <jc>// "Hello World"</jc> 6927 * titleCase(<js>"helloWorld"</js>); <jc>// "Hello World"</jc> 6928 * titleCase(<js>"hello_world"</js>); <jc>// "Hello World"</jc> 6929 * titleCase(<js>"hello-world"</js>); <jc>// "Hello World"</jc> 6930 * </p> 6931 * 6932 * @param str The string to convert. 6933 * @return The Title Case string, or <jk>null</jk> if input is <jk>null</jk>. 6934 */ 6935 public static String titleCase(String str) { 6936 if (str == null) 6937 return null; 6938 if (isEmpty(str)) 6939 return str; 6940 6941 var words = splitWords(str); 6942 if (words.isEmpty()) 6943 return ""; 6944 6945 var result = new StringBuilder(); 6946 for (var i = 0; i < words.size(); i++) { 6947 if (i > 0) 6948 result.append(' '); 6949 result.append(capitalize(words.get(i).toLowerCase())); 6950 } 6951 6952 return result.toString(); 6953 } 6954 6955 /** 6956 * Converts the specified object to a comma-delimited list. 6957 * 6958 * @param o The object to convert. 6959 * @return The specified object as a comma-delimited list. 6960 */ 6961 public static String toCdl(Object o) { 6962 if (o == null) 6963 return null; 6964 if (isArray(o)) { 6965 var sb = new StringBuilder(); 6966 for (int i = 0, j = Array.getLength(o); i < j; i++) { 6967 if (i > 0) 6968 sb.append(", "); 6969 sb.append(Array.get(o, i)); 6970 } 6971 return sb.toString(); 6972 } 6973 if (o instanceof Collection<?> c) 6974 return join(c, ", "); 6975 return o.toString(); 6976 } 6977 6978 /** 6979 * Converts the specified byte into a 2 hexadecimal characters. 6980 * 6981 * @param b The number to convert to hex. 6982 * @return A <code><jk>char</jk>[2]</code> containing the specified characters. 6983 */ 6984 public static String toHex(byte b) { 6985 var c = new char[2]; 6986 var v = b & 0xFF; 6987 c[0] = HEX[v >>> 4]; 6988 c[1] = HEX[v & 0x0F]; 6989 return new String(c); 6990 } 6991 6992 /** 6993 * Converts a byte array into a simple hexadecimal character string. 6994 * 6995 * @param bytes The bytes to convert to hexadecimal. 6996 * @return A new string consisting of hexadecimal characters. 6997 */ 6998 public static String toHex(byte[] bytes) { 6999 var sb = new StringBuilder(bytes.length * 2); 7000 for (var element : bytes) { 7001 var v = element & 0xFF; 7002 sb.append(HEX[v >>> 4]).append(HEX[v & 0x0F]); 7003 } 7004 return sb.toString(); 7005 } 7006 7007 /** 7008 * Converts the contents of the specified input stream to a hex string. 7009 * 7010 * @param is The input stream to convert. 7011 * @return The hex string representation of the input stream contents, or <jk>null</jk> if the stream is <jk>null</jk>. 7012 */ 7013 public static String toHex(InputStream is) { 7014 return safe(() -> is == null ? null : toHex(readBytes(is))); 7015 } 7016 7017 /** 7018 * Converts the specified number into a 2 hexadecimal characters. 7019 * 7020 * @param num The number to convert to hex. 7021 * @return A <code><jk>char</jk>[2]</code> containing the specified characters. 7022 */ 7023 public static char[] toHex2(int num) { 7024 if (num < 0 || num > 255) 7025 throw new NumberFormatException("toHex2 can only be used on numbers between 0 and 255"); 7026 var n = new char[2]; 7027 var a = num % 16; 7028 n[1] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7029 a = (num / 16) % 16; 7030 n[0] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7031 return n; 7032 } 7033 7034 /** 7035 * Converts the specified number into a 4 hexadecimal characters. 7036 * 7037 * @param num The number to convert to hex. 7038 * @return A <code><jk>char</jk>[4]</code> containing the specified characters. 7039 * @throws NumberFormatException If the number is negative. 7040 */ 7041 public static char[] toHex4(int num) { 7042 if (num < 0) 7043 throw new NumberFormatException("toHex4 can only be used on non-negative numbers"); 7044 var n = new char[4]; 7045 var a = num % 16; 7046 n[3] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7047 var base = 16; 7048 for (var i = 1; i < 4; i++) { 7049 a = (num / base) % 16; 7050 base <<= 4; 7051 n[3 - i] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7052 } 7053 return n; 7054 } 7055 7056 /** 7057 * Converts the specified number into a 8 hexadecimal characters. 7058 * 7059 * @param num The number to convert to hex. 7060 * @return A <code><jk>char</jk>[8]</code> containing the specified characters. 7061 * @throws NumberFormatException If the number is negative. 7062 */ 7063 public static char[] toHex8(long num) { 7064 if (num < 0) 7065 throw new NumberFormatException("toHex8 can only be used on non-negative numbers"); 7066 var n = new char[8]; 7067 var a = num % 16; 7068 n[7] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7069 var base = 16; 7070 for (var i = 1; i < 8; i++) { 7071 a = (num / base) % 16; 7072 base <<= 4; 7073 n[7 - i] = (char)(a > 9 ? 'A' + a - 10 : '0' + a); 7074 } 7075 return n; 7076 } 7077 7078 /** 7079 * Converts the specified object to an ISO8601 date string. 7080 * 7081 * @param c The object to convert. 7082 * @return The converted object. 7083 */ 7084 public static String toIsoDate(Calendar c) { 7085 if (c == null) { 7086 return null; 7087 } 7088 // Convert Calendar to ZonedDateTime and format as ISO8601 date (YYYY-MM-DD) 7089 ZonedDateTime zdt = c.toInstant().atZone(c.getTimeZone().toZoneId()); 7090 return zdt.format(DateTimeFormatter.ISO_LOCAL_DATE); 7091 } 7092 7093 /** 7094 * Converts the specified object to an ISO8601 date-time string. 7095 * 7096 * @param c The object to convert. 7097 * @return The converted object. 7098 */ 7099 public static String toIsoDateTime(Calendar c) { 7100 if (c == null) { 7101 return null; 7102 } 7103 // Convert Calendar to ZonedDateTime and format as ISO8601 date-time with timezone 7104 ZonedDateTime zdt = c.toInstant().atZone(c.getTimeZone().toZoneId()); 7105 return zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); 7106 } 7107 7108 /** 7109 * Converts the specified bytes into a readable string. 7110 * 7111 * @param b The number to convert to hex. 7112 * @return A <code><jk>char</jk>[2]</code> containing the specified characters. 7113 */ 7114 public static String toReadableBytes(byte[] b) { 7115 var sb = new StringBuilder(); 7116 for (var b2 : b) 7117 sb.append((b2 < ' ' || b2 > 'z') ? String.format("[%02X]", b2) : (char)b2 + " "); 7118 sb.append("\n"); 7119 for (var b2 : b) 7120 sb.append(String.format("[%02X]", b2)); 7121 return sb.toString(); 7122 } 7123 7124 /** 7125 * Same as {@link #toHex(byte[])} but puts spaces between the byte strings. 7126 * 7127 * @param bytes The bytes to convert to hexadecimal. 7128 * @return A new string consisting of hexadecimal characters. 7129 */ 7130 public static String toSpacedHex(byte[] bytes) { 7131 var sb = new StringBuilder(bytes.length * 3); 7132 for (var j = 0; j < bytes.length; j++) { 7133 if (j > 0) 7134 sb.append(' '); 7135 var v = bytes[j] & 0xFF; 7136 sb.append(HEX[v >>> 4]).append(HEX[v & 0x0F]); 7137 } 7138 return sb.toString(); 7139 } 7140 7141 /** 7142 * Safely converts an object to a string, returning <jk>null</jk> if the object is <jk>null</jk>. 7143 * 7144 * @param obj The object to convert to a string. 7145 * @return The string representation of the object, or <jk>null</jk> if the object is <jk>null</jk>. 7146 */ 7147 public static String toString(Object obj) { 7148 return obj == null ? null : obj.toString(); 7149 } 7150 7151 /** 7152 * Safely converts an object to a string, returning the default string if the object is <jk>null</jk>. 7153 * 7154 * @param obj The object to convert to a string. 7155 * @param defaultStr The default string to return if the object is <jk>null</jk>. 7156 * @return The string representation of the object, or the default string if the object is <jk>null</jk>. 7157 */ 7158 public static String toString(Object obj, String defaultStr) { 7159 return obj == null ? defaultStr : obj.toString(); 7160 } 7161 7162 /** 7163 * Converts a collection of strings to a string array. 7164 * 7165 * <p> 7166 * Returns <jk>null</jk> if the collection is <jk>null</jk>. 7167 * Returns an empty array if the collection is empty. 7168 * 7169 * @param collection The collection to convert. Can be <jk>null</jk>. 7170 * @return A new string array containing the collection elements, or <jk>null</jk> if the collection was <jk>null</jk>. 7171 */ 7172 public static String[] toStringArray(Collection<String> collection) { 7173 if (collection == null) 7174 return null; // NOSONAR - Intentional. 7175 return collection.toArray(new String[collection.size()]); 7176 } 7177 7178 /** 7179 * Converts the specified object to a URI. 7180 * 7181 * @param o The object to convert to a URI. 7182 * @return A new URI, or the same object if the object was already a URI, or 7183 */ 7184 public static URI toUri(Object o) { 7185 if (o == null || o instanceof URI) 7186 return (URI)o; 7187 try { 7188 return new URI(o.toString()); 7189 } catch (URISyntaxException e) { 7190 throw toRex(e); 7191 } 7192 } 7193 7194 /** 7195 * Converts the specified byte array to a UTF-8 string. 7196 * 7197 * @param b The byte array to convert. 7198 * @return The UTF-8 string representation, or <jk>null</jk> if the array is <jk>null</jk>. 7199 */ 7200 public static String toUtf8(byte[] b) { 7201 return b == null ? null : new String(b, UTF8); 7202 } 7203 7204 /** 7205 * Converts the contents of the specified input stream to a UTF-8 string. 7206 * 7207 * @param is The input stream to convert. 7208 * @return The UTF-8 string representation of the input stream contents, or <jk>null</jk> if the stream is <jk>null</jk>. 7209 */ 7210 public static String toUtf8(InputStream is) { 7211 return safe(() -> is == null ? null : new String(readBytes(is), UTF8)); 7212 } 7213 7214 /** 7215 * Transliterates characters in a string by mapping characters from one set to another. 7216 * 7217 * <p> 7218 * Performs character-by-character translation. If a character is found in <c>fromChars</c>, 7219 * it is replaced with the corresponding character at the same position in <c>toChars</c>. 7220 * Characters not found in <c>fromChars</c> are left unchanged. 7221 * 7222 * <h5 class='section'>Example:</h5> 7223 * <p class='bjava'> 7224 * transliterate(<js>"hello"</js>, <js>"aeiou"</js>, <js>"12345"</js>); 7225 * <jc>// "h2ll4"</jc> 7226 * transliterate(<js>"ABC"</js>, <js>"ABC"</js>, <js>"XYZ"</js>); 7227 * <jc>// "XYZ"</jc> 7228 * </p> 7229 * 7230 * @param str The string to transliterate. Can be <jk>null</jk>. 7231 * @param fromChars The source character set. Can be <jk>null</jk>. 7232 * @param toChars The target character set. Can be <jk>null</jk>. 7233 * @return The transliterated string, or <jk>null</jk> if input is <jk>null</jk>. 7234 * @throws IllegalArgumentException If <c>fromChars</c> and <c>toChars</c> have different lengths. 7235 */ 7236 public static String transliterate(String str, String fromChars, String toChars) { 7237 if (str == null) 7238 return null; 7239 if (fromChars == null || toChars == null || fromChars.isEmpty() || toChars.isEmpty()) 7240 return str; 7241 if (fromChars.length() != toChars.length()) 7242 throw new IllegalArgumentException("fromChars and toChars must have the same length"); 7243 7244 var sb = new StringBuilder(str.length()); 7245 for (var i = 0; i < str.length(); i++) { 7246 var c = str.charAt(i); 7247 var index = fromChars.indexOf(c); 7248 if (index >= 0) 7249 sb.append(toChars.charAt(index)); 7250 else 7251 sb.append(c); 7252 } 7253 return sb.toString(); 7254 } 7255 7256 /** 7257 * Same as {@link String#trim()} but prevents <c>NullPointerExceptions</c>. 7258 * 7259 * @param s The string to trim. 7260 * @return The trimmed string, or <jk>null</jk> if the string was <jk>null</jk>. 7261 */ 7262 public static String trim(String s) { 7263 if (s == null) 7264 return null; 7265 return s.trim(); 7266 } 7267 7268 /** 7269 * Trims whitespace characters from the end of the specified string. 7270 * 7271 * @param s The string to trim. 7272 * @return The trimmed string, or <jk>null</jk> if the string was <jk>null</jk>. 7273 */ 7274 public static String trimEnd(String s) { 7275 if (nn(s)) 7276 while (ne(s) && isWhitespace(s.charAt(s.length() - 1))) 7277 s = s.substring(0, s.length() - 1); 7278 return s; 7279 } 7280 7281 /** 7282 * Trims <js>'/'</js> characters from the beginning of the specified string. 7283 * 7284 * @param s The string to trim. 7285 * @return A new trimmed string, or the same string if no trimming was necessary. 7286 */ 7287 public static String trimLeadingSlashes(String s) { 7288 if (s == null) 7289 return null; 7290 while (ne(s) && s.charAt(0) == '/') 7291 s = s.substring(1); 7292 return s; 7293 } 7294 7295 /** 7296 * Trims <js>'/'</js> characters from both the start and end of the specified string. 7297 * 7298 * @param s The string to trim. 7299 * @return A new trimmed string, or the same string if no trimming was necessary. 7300 */ 7301 public static String trimSlashes(String s) { 7302 if (s == null) 7303 return null; 7304 if (s.isEmpty()) 7305 return s; 7306 while (endsWith(s, '/')) 7307 s = s.substring(0, s.length() - 1); 7308 while (ne(s) && s.charAt(0) == '/') // NOSONAR - NPE not possible here. 7309 s = s.substring(1); 7310 return s; 7311 } 7312 7313 /** 7314 * Trims <js>'/'</js> and space characters from both the start and end of the specified string. 7315 * 7316 * @param s The string to trim. 7317 * @return A new trimmed string, or the same string if no trimming was necessary. 7318 */ 7319 public static String trimSlashesAndSpaces(String s) { 7320 if (s == null) 7321 return null; 7322 while (ne(s) && (s.charAt(s.length() - 1) == '/' || isWhitespace(s.charAt(s.length() - 1)))) 7323 s = s.substring(0, s.length() - 1); 7324 while (ne(s) && (s.charAt(0) == '/' || isWhitespace(s.charAt(0)))) 7325 s = s.substring(1); 7326 return s; 7327 } 7328 7329 /** 7330 * Trims whitespace characters from the beginning of the specified string. 7331 * 7332 * @param s The string to trim. 7333 * @return The trimmed string, or <jk>null</jk> if the string was <jk>null</jk>. 7334 */ 7335 public static String trimStart(String s) { 7336 if (nn(s)) 7337 while (ne(s) && isWhitespace(s.charAt(0))) 7338 s = s.substring(1); 7339 return s; 7340 } 7341 7342 /** 7343 * Trims <js>'/'</js> characters from the end of the specified string. 7344 * 7345 * @param s The string to trim. 7346 * @return A new trimmed string, or the same string if no trimming was necessary. 7347 */ 7348 public static String trimTrailingSlashes(String s) { 7349 if (s == null) 7350 return null; 7351 while (endsWith(s, '/')) 7352 s = s.substring(0, s.length() - 1); 7353 return s; 7354 } 7355 7356 /** 7357 * Uncapitalizes the first character of a string. 7358 * 7359 * <h5 class='section'>Example:</h5> 7360 * <p class='bjava'> 7361 * uncapitalize(<jk>null</jk>); <jc>// null</jc> 7362 * uncapitalize(<js>""</js>); <jc>// ""</jc> 7363 * uncapitalize(<js>"Hello"</js>); <jc>// "hello"</jc> 7364 * uncapitalize(<js>"hello"</js>); <jc>// "hello"</jc> 7365 * uncapitalize(<js>"HELLO"</js>); <jc>// "hELLO"</jc> 7366 * </p> 7367 * 7368 * @param str The string to uncapitalize. 7369 * @return The string with the first character uncapitalized, or <jk>null</jk> if input is <jk>null</jk>. 7370 */ 7371 public static String uncapitalize(String str) { 7372 if (isEmpty(str)) 7373 return str; 7374 return Character.toLowerCase(str.charAt(0)) + str.substring(1); 7375 } 7376 7377 /** 7378 * Removes escape characters from the specified characters. 7379 * 7380 * @param s The string to remove escape characters from. 7381 * @param escaped The characters escaped. 7382 * @return A new string if characters were removed, or the same string if not or if the input was <jk>null</jk>. 7383 */ 7384 public static String unescapeChars(String s, AsciiSet escaped) { 7385 if (s == null || s.isEmpty()) 7386 return s; 7387 var count = 0; 7388 for (var i = 0; i < s.length(); i++) 7389 if (escaped.contains(s.charAt(i))) 7390 count++; 7391 if (count == 0) 7392 return s; 7393 var sb = new StringBuffer(s.length() - count); 7394 for (var i = 0; i < s.length(); i++) { 7395 var c = s.charAt(i); 7396 7397 if (c == '\\') { 7398 if (i + 1 != s.length()) { // NOSONAR - Intentional. 7399 var c2 = s.charAt(i + 1); 7400 if (escaped.contains(c2)) { 7401 i++; // NOSONAR - Intentional. 7402 } 7403 } 7404 } 7405 sb.append(s.charAt(i)); 7406 } 7407 return sb.toString(); 7408 } 7409 7410 /** 7411 * Unescapes HTML entities in a string. 7412 * 7413 * <p> 7414 * Unescapes the following HTML entities: 7415 * <ul> 7416 * <li><js>"&amp;"</js> → <js>'&'</js></li> 7417 * <li><js>"&lt;"</js> → <js>'<'</js></li> 7418 * <li><js>"&gt;"</js> → <js>'>'</js></li> 7419 * <li><js>"&quot;"</js> → <js>'"'</js></li> 7420 * <li><js>"&#39;"</js> or <js>"&apos;"</js> → <js>'\''</js></li> 7421 * </ul> 7422 * 7423 * <h5 class='section'>Example:</h5> 7424 * <p class='bjava'> 7425 * unescapeHtml(<js>"&lt;script&gt;"</js>); <jc>// Returns: "<script>"</jc> 7426 * </p> 7427 * 7428 * @param str The string to unescape. 7429 * @return The unescaped string, or <jk>null</jk> if input is <jk>null</jk>. 7430 */ 7431 public static String unescapeHtml(String str) { 7432 if (str == null) 7433 return null; 7434 // Must unescape & last to avoid interfering with other entities 7435 return str.replace("<", "<").replace(">", ">").replace(""", "\"").replace("'", "'").replace("'", "'").replace("&", "&"); 7436 } 7437 7438 /** 7439 * Unescapes XML entities in a string. 7440 * 7441 * <p> 7442 * Unescapes the following XML entities: 7443 * <ul> 7444 * <li><js>"&amp;"</js> → <js>'&'</js></li> 7445 * <li><js>"&lt;"</js> → <js>'<'</js></li> 7446 * <li><js>"&gt;"</js> → <js>'>'</js></li> 7447 * <li><js>"&quot;"</js> → <js>'"'</js></li> 7448 * <li><js>"&apos;"</js> → <js>'\''</js></li> 7449 * </ul> 7450 * 7451 * <h5 class='section'>Example:</h5> 7452 * <p class='bjava'> 7453 * unescapeXml(<js>"&lt;tag&gt;"</js>); <jc>// Returns: "<tag>"</jc> 7454 * </p> 7455 * 7456 * @param str The string to unescape. 7457 * @return The unescaped string, or <jk>null</jk> if input is <jk>null</jk>. 7458 */ 7459 public static String unescapeXml(String str) { 7460 if (str == null) 7461 return null; 7462 // Must unescape & last to avoid interfering with other entities 7463 return str.replace("<", "<").replace(">", ">").replace(""", "\"").replace("'", "'").replace("&", "&"); 7464 } 7465 7466 /** 7467 * Creates an escaped-unicode sequence (e.g. <js>"\\u1234"</js>) for the specified character. 7468 * 7469 * @param c The character to create a sequence for. 7470 * @return An escaped-unicode sequence. 7471 */ 7472 public static String unicodeSequence(char c) { 7473 var sb = new StringBuilder(6); 7474 sb.append('\\').append('u'); 7475 for (var cc : toHex4(c)) 7476 sb.append(cc); 7477 return sb.toString(); 7478 } 7479 7480 /** 7481 * Null-safe convenience method for {@link String#toUpperCase()}. 7482 * 7483 * <p> 7484 * Converts the string to uppercase if not null. 7485 * 7486 * @param s The string to convert. 7487 * @return The uppercase string, or <jk>null</jk> if the input was <jk>null</jk>. 7488 * @see #lowerCase(String) 7489 * @see Utils#uc(String) 7490 */ 7491 public static String upperCase(String s) { 7492 return s == null ? null : s.toUpperCase(); 7493 } 7494 7495 /** 7496 * Decodes a <c>application/x-www-form-urlencoded</c> string using <c>UTF-8</c> encoding scheme. 7497 * 7498 * @param s The string to decode. 7499 * @return The decoded string, or <jk>null</jk> if input is <jk>null</jk>. 7500 */ 7501 public static String urlDecode(String s) { 7502 7503 if (s == null) 7504 return s; 7505 7506 var needsDecode = false; 7507 for (var i = 0; i < s.length() && ! needsDecode; i++) { 7508 var c = s.charAt(i); 7509 if (c == '+' || c == '%') 7510 needsDecode = true; 7511 } 7512 7513 if (needsDecode) { 7514 return safe(()->URLDecoder.decode(s, "UTF-8")); 7515 } 7516 return s; 7517 } 7518 7519 /** 7520 * Encodes a <c>application/x-www-form-urlencoded</c> string using <c>UTF-8</c> encoding scheme. 7521 * 7522 * @param s The string to encode. 7523 * @return The encoded string, or <jk>null</jk> if input is <jk>null</jk>. 7524 */ 7525 public static String urlEncode(String s) { 7526 7527 if (s == null) 7528 return null; 7529 7530 var needsEncode = false; 7531 7532 for (var i = 0; i < s.length() && ! needsEncode; i++) 7533 needsEncode |= (! URL_UNENCODED_CHARS.contains(s.charAt(i))); 7534 7535 if (needsEncode) { 7536 return safe(()->URLEncoder.encode(s, "UTF-8")); 7537 } 7538 7539 return s; 7540 } 7541 7542 /** 7543 * Same as {@link #urlEncode(String)} except only escapes characters that absolutely need to be escaped. 7544 * 7545 * @param s The string to escape. 7546 * @return The encoded string, or <jk>null</jk> if input is <jk>null</jk>. 7547 */ 7548 public static String urlEncodeLax(String s) { 7549 if (s == null) 7550 return null; 7551 var needsEncode = false; 7552 for (var i = 0; i < s.length() && ! needsEncode; i++) 7553 needsEncode |= (! URL_UNENCODED_LAX_CHARS.contains(s.charAt(i))); 7554 if (needsEncode) { 7555 var sb = new StringBuilder(s.length() * 2); 7556 for (var i = 0; i < s.length(); i++) { 7557 var c = s.charAt(i); 7558 if (URL_UNENCODED_LAX_CHARS.contains(c)) 7559 sb.append(c); 7560 else if (c == ' ') 7561 sb.append("+"); 7562 else if (c <= 127) 7563 sb.append('%').append(toHex2(c)); 7564 else 7565 safe(()->sb.append(URLEncoder.encode("" + c, "UTF-8"))); // Yuck. 7566 } 7567 s = sb.toString(); 7568 } 7569 return s; 7570 } 7571 7572 /** 7573 * Similar to {@link URLEncoder#encode(String, String)} but doesn't encode <js>"/"</js> characters. 7574 * 7575 * @param o The object to encode. 7576 * @return The URL encoded string, or <jk>null</jk> if the object was null. 7577 */ 7578 public static String urlEncodePath(Object o) { 7579 7580 if (o == null) 7581 return null; 7582 7583 var s = s(o); 7584 7585 var needsEncode = false; 7586 for (var i = 0; i < s.length() && ! needsEncode; i++) 7587 needsEncode = URL_ENCODE_PATHINFO_VALIDCHARS.contains(s.charAt(i)); 7588 if (! needsEncode) 7589 return s; 7590 7591 var sb = new StringBuilder(); 7592 var caw = new CharArrayWriter(); 7593 var caseDiff = ('a' - 'A'); 7594 7595 for (var i = 0; i < s.length();) { 7596 var c = s.charAt(i); 7597 if (URL_ENCODE_PATHINFO_VALIDCHARS.contains(c)) { 7598 sb.append(c); 7599 i++; // NOSONAR - Intentional. 7600 } else { 7601 if (c == ' ') { 7602 sb.append('+'); 7603 i++; // NOSONAR - Intentional. 7604 } else { 7605 do { 7606 caw.write(c); 7607 if (c >= 0xD800 && c <= 0xDBFF) { 7608 if ((i + 1) < s.length()) { // NOSONAR - Intentional. 7609 int d = s.charAt(i + 1); 7610 if (d >= 0xDC00 && d <= 0xDFFF) { 7611 caw.write(d); 7612 i++; // NOSONAR - Intentional. 7613 } 7614 } 7615 } 7616 i++; // NOSONAR - Intentional. 7617 } while (i < s.length() && ! URL_ENCODE_PATHINFO_VALIDCHARS.contains((c = s.charAt(i)))); // NOSONAR - Intentional. 7618 7619 caw.flush(); 7620 var s2 = new String(caw.toCharArray()); 7621 var ba = s2.getBytes(UTF8); 7622 for (var element : ba) { 7623 sb.append('%'); 7624 var ch = forDigit((element >> 4) & 0xF, 16); 7625 if (isLetter(ch)) { 7626 ch -= caseDiff; 7627 } 7628 sb.append(ch); 7629 ch = forDigit(element & 0xF, 16); 7630 if (isLetter(ch)) { 7631 ch -= caseDiff; 7632 } 7633 sb.append(ch); 7634 } 7635 caw.reset(); 7636 } 7637 } 7638 } 7639 return sb.toString(); 7640 } 7641 7642 /** 7643 * Counts the number of words in a string. 7644 * 7645 * <p> 7646 * A word is defined as a sequence of one or more word characters (letters, digits, underscores) 7647 * separated by non-word characters. 7648 * 7649 * <h5 class='section'>Example:</h5> 7650 * <p class='bjava'> 7651 * wordCount(<js>"Hello world"</js>); <jc>// 2</jc> 7652 * wordCount(<js>"The quick brown fox"</js>); <jc>// 4</jc> 7653 * wordCount(<js>"Hello, world! How are you?"</js>); <jc>// 5</jc> 7654 * </p> 7655 * 7656 * @param str The string to count words in. Can be <jk>null</jk>. 7657 * @return The number of words, or <c>0</c> if the string is <jk>null</jk> or empty. 7658 */ 7659 public static int wordCount(String str) { 7660 if (isEmpty(str)) 7661 return 0; 7662 7663 var count = 0; 7664 var inWord = false; 7665 7666 for (var i = 0; i < str.length(); i++) { 7667 var c = str.charAt(i); 7668 if ((LETTER.contains(c) || DIGIT.contains(c)) || c == '_') { 7669 if (! inWord) { 7670 count++; 7671 inWord = true; 7672 } 7673 } else { 7674 inWord = false; 7675 } 7676 } 7677 7678 return count; 7679 } 7680 7681 /** 7682 * Wraps text to a specified line length. 7683 * 7684 * <p> 7685 * Wraps text by breaking at word boundaries (spaces). Words longer than the wrap length 7686 * will be broken at the wrap length. Existing newlines are preserved. 7687 * 7688 * <h5 class='section'>Example:</h5> 7689 * <p class='bjava'> 7690 * wrap(<js>"hello world test"</js>, 10); <jc>// "hello world\ntest"</jc> 7691 * wrap(<jk>null</jk>, 10); <jc>// null</jc> 7692 * </p> 7693 * 7694 * @param str The string to wrap. 7695 * @param wrapLength The maximum line length (must be > 0). 7696 * @return The wrapped string, or <jk>null</jk> if input is <jk>null</jk>. 7697 * @throws IllegalArgumentException if wrapLength is <= 0. 7698 */ 7699 public static String wrap(String str, int wrapLength) { 7700 return wrap(str, wrapLength, "\n"); 7701 } 7702 7703 /** 7704 * Wraps text to a specified line length with a custom newline string. 7705 * 7706 * <p> 7707 * Wraps text by breaking at word boundaries (spaces). Words longer than the wrap length 7708 * will be broken at the wrap length. Existing newlines are preserved. 7709 * 7710 * <h5 class='section'>Example:</h5> 7711 * <p class='bjava'> 7712 * wrap(<js>"hello world test"</js>, 10, <js>"<br>"</js>); <jc>// "hello world<br>test"</jc> 7713 * wrap(<jk>null</jk>, 10, <js>"\n"</js>); <jc>// null</jc> 7714 * </p> 7715 * 7716 * @param str The string to wrap. 7717 * @param wrapLength The maximum line length (must be > 0). 7718 * @param newline The string to use as line separator. 7719 * @return The wrapped string, or <jk>null</jk> if input is <jk>null</jk>. 7720 * @throws IllegalArgumentException if wrapLength is <= 0 or newline is <jk>null</jk>. 7721 */ 7722 public static String wrap(String str, int wrapLength, String newline) { 7723 if (str == null) 7724 return null; 7725 if (isEmpty(str)) 7726 return str; 7727 if (wrapLength <= 0) 7728 throw illegalArg("wrapLength must be > 0: {0}", wrapLength); 7729 if (newline == null) 7730 throw illegalArg("newline cannot be null"); 7731 7732 var result = new StringBuilder(); 7733 var lines = str.split("\r?\n", -1); // Preserve empty lines 7734 7735 for (var lineIdx = 0; lineIdx < lines.length; lineIdx++) { 7736 var line = lines[lineIdx]; 7737 if (line.isEmpty()) { 7738 if (lineIdx < lines.length - 1) 7739 result.append(newline); 7740 continue; 7741 } 7742 7743 // Split into words first, then combine words that fit 7744 var words = line.split(" +"); // Split on one or more spaces 7745 var currentLine = new StringBuilder(); 7746 7747 for (var word : words) { 7748 if (word.isEmpty()) 7749 continue; 7750 7751 var wordLength = word.length(); 7752 var currentLength = currentLine.length(); 7753 7754 if (currentLength == 0) { 7755 // First word on line 7756 // Only break single words if there are multiple words in the input 7757 // (single long words should not be broken for readability) 7758 if (wordLength > wrapLength && words.length > 1) { 7759 // Word is too long and there are other words, break it 7760 if (result.length() > 0) 7761 result.append(newline); 7762 var wordPos = 0; 7763 while (wordPos < wordLength) { 7764 if (wordPos > 0) 7765 result.append(newline); 7766 var remaining = wordLength - wordPos; 7767 if (remaining <= wrapLength) { 7768 result.append(word.substring(wordPos)); 7769 break; 7770 } 7771 result.append(word.substring(wordPos, wordPos + wrapLength)); 7772 wordPos += wrapLength; 7773 } 7774 } else { 7775 currentLine.append(word); 7776 } 7777 } else { 7778 // Check if we can add this word to current line 7779 var neededLength = currentLength + 1 + wordLength; // current + space + word 7780 // Break if it would fit exactly or exceed - prefer breaking for readability 7781 if (neededLength < wrapLength) { 7782 // Fits with room to spare 7783 currentLine.append(' ').append(word); 7784 } else { 7785 // Doesn't fit or fits exactly - start new line 7786 if (result.length() > 0) 7787 result.append(newline); 7788 result.append(currentLine); 7789 currentLine.setLength(0); 7790 if (wordLength > wrapLength && words.length > 1) { 7791 // Word is too long and there are other words, break it 7792 result.append(newline); 7793 var wordPos = 0; 7794 while (wordPos < wordLength) { 7795 if (wordPos > 0) 7796 result.append(newline); 7797 var remaining = wordLength - wordPos; 7798 if (remaining <= wrapLength) { 7799 result.append(word.substring(wordPos)); 7800 break; 7801 } 7802 result.append(word.substring(wordPos, wordPos + wrapLength)); 7803 wordPos += wrapLength; 7804 } 7805 } else { 7806 currentLine.append(word); 7807 } 7808 } 7809 } 7810 } 7811 7812 // Append any remaining line 7813 if (currentLine.length() > 0) { 7814 if (result.length() > 0) 7815 result.append(newline); 7816 result.append(currentLine); 7817 } 7818 } 7819 7820 return result.toString(); 7821 } 7822 7823 /** 7824 * Helper method to estimate the number of syllables in a word. 7825 */ 7826 private static int estimateSyllables(String word) { 7827 7828 var lower = word.toLowerCase(); 7829 var count = 0; 7830 var prevWasVowel = false; 7831 7832 for (var i = 0; i < lower.length(); i++) { 7833 var c = lower.charAt(i); 7834 var isVowel = (VOWEL.contains(c) || c == 'y'); 7835 7836 if (isVowel && ! prevWasVowel) { 7837 count++; 7838 } 7839 prevWasVowel = isVowel; 7840 } 7841 7842 // Handle silent 'e' at the end 7843 if (lower.endsWith("e") && count > 1) { 7844 count--; 7845 } 7846 7847 // At least one syllable 7848 return Math.max(1, count); 7849 } 7850 7851 /** 7852 * Finds the first non-whitespace, non-comment character in a string. 7853 * 7854 * @param s The string to analyze. 7855 * @return The first real character, or <c>-1</c> if none found. 7856 */ 7857 private static int firstRealCharacter(String s) { 7858 return safe(()-> { 7859 var r = new StringReader(s); 7860 var c = 0; 7861 while ((c = r.read()) != -1) { 7862 if (! isWhitespace(c)) { 7863 if (c == '/') { 7864 skipComments(r); 7865 } else { 7866 return c; 7867 } 7868 } 7869 } 7870 return -1; 7871 }); 7872 } 7873 7874 /** 7875 * Helper method to get Soundex code for a character. 7876 */ 7877 private static char getSoundexCode(char c) { 7878 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U' || c == 'H' || c == 'W' || c == 'Y') 7879 return '0'; 7880 if (c == 'B' || c == 'F' || c == 'P' || c == 'V') 7881 return '1'; 7882 if (c == 'C' || c == 'G' || c == 'J' || c == 'K' || c == 'Q' || c == 'S' || c == 'X' || c == 'Z') 7883 return '2'; 7884 if (c == 'D' || c == 'T') 7885 return '3'; 7886 if (c == 'L') 7887 return '4'; 7888 if (c == 'M' || c == 'N') 7889 return '5'; 7890 if (c == 'R') 7891 return '6'; 7892 return '0'; // Non-letter characters 7893 } 7894 7895 /** 7896 * Validates if a string is a valid IPv6 address format (without network operations). 7897 * 7898 * <p> 7899 * This method performs pure string-based validation and does not perform any DNS lookups 7900 * or network operations, making it fast and suitable for validation purposes. 7901 * 7902 * @param ip The IPv6 address string to validate. 7903 * @return <jk>true</jk> if the string is a valid IPv6 address format, <jk>false</jk> otherwise. 7904 */ 7905 public static boolean isValidIPv6Address(String ip) { 7906 if (ip == null || ip.isEmpty()) 7907 return false; 7908 7909 // IPv6 addresses can be: 7910 // 1. Full format: 2001:0db8:85a3:0000:0000:8a2e:0370:7334 (8 groups of 4 hex digits) 7911 // 2. Compressed format: 2001:db8::1 (uses :: to represent consecutive zeros) 7912 // 3. IPv4-mapped: ::ffff:192.168.1.1 (last 32 bits as IPv4) 7913 // 4. Loopback: ::1 7914 // 5. Unspecified: :: 7915 7916 // Cannot start or end with a single colon (except ::) 7917 if (ip.startsWith(":") && !ip.startsWith("::")) 7918 return false; 7919 if (ip.endsWith(":") && !ip.endsWith("::")) 7920 return false; 7921 7922 // Check for IPv4-mapped format (contains both : and .) 7923 if (ip.contains(".")) { 7924 // Must be in format ::ffff:x.x.x.x or similar 7925 var lastColon = ip.lastIndexOf(":"); 7926 if (lastColon < 0) 7927 return false; 7928 var ipv4Part = ip.substring(lastColon + 1); 7929 // Validate IPv4 part 7930 var ipv4Parts = ipv4Part.split("\\."); 7931 if (ipv4Parts.length != 4) 7932 return false; 7933 for (var part : ipv4Parts) { 7934 try { 7935 var num = Integer.parseInt(part); 7936 if (num < 0 || num > 255) 7937 return false; 7938 } catch (@SuppressWarnings("unused") NumberFormatException e) { 7939 return false; 7940 } 7941 } 7942 // Validate IPv6 part before the IPv4 7943 var ipv6Part = ip.substring(0, lastColon); 7944 // Accept empty, ::, ::ffff, ::FFFF, or : (when string starts with ::) 7945 if (ipv6Part.isEmpty() || ipv6Part.equals("::") || ipv6Part.equals("::ffff") || ipv6Part.equals("::FFFF") || (ipv6Part.equals(":") && ip.startsWith("::"))) 7946 return true; 7947 // More complex validation would be needed for other IPv4-mapped formats 7948 // For now, accept common formats 7949 } 7950 7951 // Check for :: (compression) - only one allowed 7952 var doubleColonCount = 0; 7953 for (var i = 1; i < ip.length(); i++) { 7954 if (ip.charAt(i) == ':' && ip.charAt(i - 1) == ':') { 7955 doubleColonCount++; 7956 if (doubleColonCount > 1) 7957 return false; // Only one :: allowed 7958 } 7959 } 7960 7961 // Split by :: 7962 var parts = ip.split("::", -1); 7963 7964 if (parts.length == 2) { 7965 // Compressed format 7966 var leftParts = parts[0].isEmpty() ? new String[0] : parts[0].split(":"); 7967 var rightParts = parts[1].isEmpty() ? new String[0] : parts[1].split(":"); 7968 var totalParts = leftParts.length + rightParts.length; 7969 if (totalParts > 7) 7970 return false; // Too many groups (max 8, but :: counts as one or more) 7971 } else { 7972 // Full format (no compression) 7973 var groups = ip.split(":"); 7974 if (groups.length != 8) 7975 return false; 7976 } 7977 7978 // Validate each hex group 7979 var groups = ip.split("::"); 7980 for (var groupSection : groups) { 7981 if (groupSection.isEmpty()) 7982 continue; // Skip empty section from :: 7983 var groupParts = groupSection.split(":"); 7984 for (var group : groupParts) { 7985 if (group.length() > 4) 7986 return false; // Each group is max 4 hex digits 7987 // Validate hex digits 7988 for (var i = 0; i < group.length(); i++) { 7989 var c = group.charAt(i); 7990 if (! HEXADECIMAL_CHARS.contains(c)) 7991 return false; 7992 } 7993 } 7994 } 7995 7996 return true; 7997 } 7998 7999 8000 private static class Readifier { 8001 private final Class<?> type; 8002 private final Function<Object,String> bridge; 8003 8004 private <T> Readifier(Class<T> type, Function<? super T,String> converter) { 8005 this.type = type; 8006 this.bridge = o -> converter.apply(type.cast(o)); 8007 } 8008 8009 Class<?> forClass() { 8010 return type; 8011 } 8012 8013 Function<Object,String> toFunction() { 8014 return bridge; 8015 } 8016 } 8017 8018 private static <T> Readifier readifier(Class<T> type, Function<? super T,String> converter) { 8019 return new Readifier(type, converter); 8020 } 8021 8022 private static List<Readifier> loadReadifiers() { 8023 var list = new ArrayList<Readifier>(); 8024 8025 // More specific types first - order matters! 8026 list.add(readifier(Map.Entry.class, x -> readable(x.getKey()) + '=' + readable(x.getValue()))); 8027 list.add(readifier(Collection.class, x -> ((Collection<?>)x).stream().map(StringUtils::readable).collect(joining(",", "[", "]")))); 8028 list.add(readifier(Map.class, x -> ((Map<?,?>)x).entrySet().stream().map(StringUtils::readable).collect(joining(",", "{", "}")))); 8029 list.add(readifier(Iterable.class, x -> readable(toList(x)))); 8030 list.add(readifier(Iterator.class, x -> readable(toList(x)))); 8031 list.add(readifier(Enumeration.class, x -> readable(toList(x)))); 8032 list.add(readifier(Optional.class, x -> readable(((Optional<?>)x).orElse(null)))); 8033 list.add(readifier(GregorianCalendar.class, x -> x.toZonedDateTime().format(DateTimeFormatter.ISO_INSTANT))); 8034 list.add(readifier(Date.class, x -> x.toInstant().toString())); 8035 list.add(readifier(InputStream.class, x -> toHex(x))); 8036 list.add(readifier(Reader.class, (Reader x) -> safe(() -> read(x)))); 8037 list.add(readifier(File.class, (File x) -> safe(() -> read(x)))); 8038 list.add(readifier(byte[].class, x -> toHex(x))); 8039 list.add(readifier(Enum.class, x -> ((Enum<?>)x).name())); 8040 list.add(readifier(Class.class, x -> cns(x))); 8041 list.add(readifier(Constructor.class, x -> ConstructorInfo.of(x).getFullName())); 8042 list.add(readifier(Method.class, x -> MethodInfo.of(x).getFullName())); 8043 list.add(readifier(Field.class, x -> FieldInfo.of(x).toString())); 8044 list.add(readifier(Parameter.class, x -> ParameterInfo.of(x).toString())); 8045 list.add(readifier(ClassInfo.class, ClassInfo::toString)); 8046 list.add(readifier(MethodInfo.class, MethodInfo::toString)); 8047 list.add(readifier(ConstructorInfo.class, ConstructorInfo::toString)); 8048 list.add(readifier(FieldInfo.class, FieldInfo::toString)); 8049 list.add(readifier(ParameterInfo.class, ParameterInfo::toString)); 8050 8051 return Collections.unmodifiableList(list); 8052 } 8053 8054 /** 8055 * Determines the multiplier value based on the suffix character in a string. 8056 * 8057 * @param s The string to analyze for multiplier suffix. 8058 * @return The multiplier value (1 if no valid suffix found). 8059 */ 8060 private static int multiplierInt(String s) { 8061 var c = isEmpty(s) ? 'z' : s.charAt(s.length() - 1); 8062 if (c == 'G') 8063 return 1024 * 1024 * 1024; 8064 if (c == 'M') 8065 return 1024 * 1024; 8066 if (c == 'K') 8067 return 1024; 8068 if (c == 'g') 8069 return 1000 * 1000 * 1000; 8070 if (c == 'm') 8071 return 1000 * 1000; 8072 if (c == 'k') 8073 return 1000; 8074 return 1; 8075 } 8076 8077 /** 8078 * Determines the long multiplier value based on the suffix character in a string. 8079 * 8080 * @param s The string to analyze for multiplier suffix. 8081 * @return The multiplier value (1 if no valid suffix found). 8082 */ 8083 private static long multiplierLong(String s) { 8084 if (isEmpty(s)) 8085 return 1; 8086 var c = s.charAt(s.length() - 1); 8087 if (c == 'P') 8088 return 1125899906842624L; // 1024^5 8089 if (c == 'T') 8090 return 1099511627776L; // 1024^4 8091 if (c == 'G') 8092 return 1073741824L; // 1024^3 8093 if (c == 'M') 8094 return 1048576L; // 1024^2 8095 if (c == 'K') 8096 return 1024L; 8097 if (c == 'p') 8098 return 1000000000000000L; // 1000^5 8099 if (c == 't') 8100 return 1000000000000L; // 1000^4 8101 if (c == 'g') 8102 return 1000000000L; // 1000^3 8103 if (c == 'm') 8104 return 1000000L; // 1000^2 8105 if (c == 'k') 8106 return 1000L; 8107 return 1; 8108 } 8109 8110 /** 8111 * Parses a unit string and converts the value to milliseconds. 8112 * 8113 * @param unit The unit string (case-insensitive, already lowercased). 8114 * @param value The numeric value. 8115 * @return The value in milliseconds, or <c>-1</c> if the unit is invalid. 8116 */ 8117 private static long parseUnit(String unit, double value) { 8118 if (isEmpty(unit)) { 8119 // No unit means milliseconds 8120 return (long)value; 8121 } 8122 8123 // Check milliseconds first (before minutes) - must check exact "ms" before checking "m" 8124 if (unit.equals("ms") || unit.equals("millis") || unit.equals("milliseconds")) 8125 return (long)value; 8126 8127 // Seconds 8128 if (unit.startsWith("s") && !unit.startsWith("sec")) 8129 return (long)(value * 1000); 8130 if (unit.startsWith("sec") || unit.startsWith("second")) 8131 return (long)(value * 1000); 8132 8133 // Minutes (must check after milliseconds and months) 8134 if (unit.startsWith("m") && !unit.startsWith("mo") && !unit.startsWith("mill") && !unit.startsWith("ms")) 8135 return (long)(value * 1000 * 60); 8136 if (unit.startsWith("min") || unit.startsWith("minute")) 8137 return (long)(value * 1000 * 60); 8138 8139 // Hours 8140 if (unit.startsWith("h") || unit.startsWith("hour")) 8141 return (long)(value * 1000 * 60 * 60); 8142 8143 // Days 8144 if (unit.startsWith("d") && !unit.startsWith("da")) 8145 return (long)(value * 1000 * 60 * 60 * 24); 8146 if (unit.startsWith("day")) 8147 return (long)(value * 1000 * 60 * 60 * 24); 8148 8149 // Weeks 8150 if (unit.startsWith("w") || unit.startsWith("week")) 8151 return (long)(value * 1000 * 60 * 60 * 24 * 7); 8152 8153 // Months (30 days) 8154 if (unit.startsWith("mo") || unit.startsWith("month")) 8155 return (long)(value * 1000 * 60 * 60 * 24 * 30); 8156 8157 // Years (365 days) 8158 if (unit.startsWith("y") && !unit.startsWith("yr")) 8159 return (long)(value * 1000 * 60 * 60 * 24 * 365); 8160 if (unit.startsWith("yr") || unit.startsWith("year")) 8161 return (long)(value * 1000 * 60 * 60 * 24 * 365); 8162 8163 // Unknown unit 8164 return -1; 8165 } 8166 8167 /** 8168 * Skips over a single comment sequence in a StringReader. 8169 * 8170 * <p> 8171 * The reader must be positioned at the first <js>'/'</js> character of a comment. 8172 * This method will skip only the comment it's currently positioned on, not all comments in the reader. 8173 * 8174 * <p> 8175 * Supports both <js>"/* * /"</js> style block comments and <js>"//"</js> style line comments. 8176 * 8177 * @param r The StringReader positioned at the start of a comment (at the first <js>'/'</js>). 8178 * @throws IOException If an I/O error occurs. 8179 */ 8180 public static void skipComments(StringReader r) throws IOException { 8181 var c = r.read(); 8182 // "/* */" style comments 8183 if (c == '*') { 8184 c = r.read(); 8185 while (c != -1) { 8186 if (c == '*') { 8187 c = r.read(); 8188 if (c == '/') 8189 return; 8190 // If not '/', continue checking from this character 8191 // Don't read again, just continue the loop 8192 } else { 8193 c = r.read(); 8194 } 8195 } 8196 // "//" style comments 8197 } else if (c == '/') { 8198 while ((c = r.read()) != -1) { 8199 if (c == '\n') 8200 return; 8201 } 8202 } 8203 } 8204 8205 /** 8206 * Gets or creates an AsciiSet for escaping the specified character. 8207 * 8208 * @param c The character to create an escape set for. 8209 * @return An AsciiSet containing the character and backslash. 8210 */ 8211 private static AsciiSet getEscapeSet(char c) { 8212 return ESCAPE_SETS.computeIfAbsent(c, key -> AsciiSet.create().chars(key, '\\').build()); 8213 } 8214 8215 /** 8216 * Helper method to split a string into words. 8217 * Detects word boundaries from separators (spaces, underscores, hyphens) and case changes. 8218 * 8219 * @param str The string to split. 8220 * @return A list of words, or empty list if input is null or empty. 8221 */ 8222 private static List<String> splitWords(String str) { 8223 if (str == null || isEmpty(str)) 8224 return Collections.emptyList(); 8225 8226 var words = new ArrayList<String>(); 8227 var sb = new StringBuilder(); 8228 var wasLowerCase = false; 8229 var wasUpperCase = false; 8230 var consecutiveUpperCount = 0; 8231 8232 for (var i = 0; i < str.length(); i++) { 8233 var c = str.charAt(i); 8234 var isSeparator = (c == ' ' || c == '_' || c == '-' || c == '\t'); 8235 var isUpperCase = LETTER_UC.contains(c); 8236 var isLowerCase = LETTER_LC.contains(c); 8237 var isLetter = LETTER.contains(c); 8238 8239 if (isSeparator) { 8240 if (sb.length() > 0) { 8241 words.add(sb.toString()); 8242 sb.setLength(0); 8243 } 8244 wasLowerCase = false; 8245 wasUpperCase = false; 8246 consecutiveUpperCount = 0; 8247 } else if (isLetter) { 8248 // Detect word boundary: 8249 // 1. Uppercase after lowercase (e.g., "helloWorld" → "hello", "World") 8250 // 2. Uppercase after consecutive uppercase when next is lowercase (e.g., "XMLHttp" → "XML", "Http") 8251 // 3. Lowercase after 2+ consecutive uppercase (e.g., "XMLHttp" → "XML", "Http") 8252 if (sb.length() > 0) { 8253 if (isUpperCase && wasLowerCase) { 8254 // Case 1: uppercase after lowercase (e.g., "helloWorld" → "hello", "World") 8255 words.add(sb.toString()); 8256 sb.setLength(0); 8257 consecutiveUpperCount = 0; 8258 } else if (isUpperCase && wasUpperCase && consecutiveUpperCount >= 2) { 8259 // Case 2: uppercase after uppercase - check if this starts a new word 8260 // Look ahead to see if next character is lowercase 8261 // This handles "XMLHttp" where 'H' starts "Http" 8262 // We need at least 2 consecutive uppercase letters before this one to split 8263 if (i + 1 < str.length()) { 8264 var nextChar = str.charAt(i + 1); 8265 if (LETTER_LC.contains(nextChar)) { 8266 // This uppercase starts a new word, split before it 8267 words.add(sb.toString()); 8268 sb.setLength(0); 8269 consecutiveUpperCount = 0; 8270 } 8271 } 8272 } else if (isLowerCase && wasUpperCase && consecutiveUpperCount >= 2) { 8273 // Case 3: lowercase after 2+ consecutive uppercase 8274 // Split all but the last uppercase (e.g., "XMLH" → "XML" + "H") 8275 var splitPoint = sb.length() - 1; 8276 words.add(sb.substring(0, splitPoint)); 8277 sb.delete(0, splitPoint); 8278 consecutiveUpperCount = 0; 8279 } 8280 } 8281 sb.append(c); 8282 // Update state AFTER appending 8283 wasLowerCase = isLowerCase; 8284 wasUpperCase = isUpperCase; 8285 if (isUpperCase) { 8286 consecutiveUpperCount++; 8287 } else { 8288 consecutiveUpperCount = 0; 8289 } 8290 } else { 8291 // Non-letter characters (digits, etc.) - treat as part of current word 8292 sb.append(c); 8293 wasLowerCase = false; 8294 wasUpperCase = false; 8295 consecutiveUpperCount = 0; 8296 } 8297 } 8298 8299 if (sb.length() > 0) 8300 words.add(sb.toString()); 8301 8302 return words; 8303 } 8304 8305 /** 8306 * Constructor. 8307 */ 8308 protected StringUtils() {} 8309}