001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.objecttools; 014 015import static org.apache.juneau.common.internal.StringUtils.*; 016 017import java.util.*; 018import java.util.regex.*; 019 020import org.apache.juneau.*; 021import org.apache.juneau.common.internal.*; 022 023/** 024 * String matcher factory for the {@link ObjectSearcher} class. 025 * 026 * <p> 027 * The class provides searching based on the following patterns: 028 * </p> 029 * <ul> 030 * <li><js>"property=foo"</js> - Simple full word match 031 * <li><js>"property=fo*"</js>, <js>"property=?ar"</js> - Meta-character matching 032 * <li><js>"property=foo bar"</js>(implicit), <js>"property=^foo ^bar"</js>(explicit) - Multiple OR'ed patterns 033 * <li><js>"property=+fo* +*ar"</js> - Multiple AND'ed patterns 034 * <li><js>"property=fo* -bar"</js> - Negative patterns 035 * <li><js>"property='foo bar'"</js> - Patterns with whitespace 036 * <li><js>"property=foo\\'bar"</js> - Patterns with single-quotes 037 * <li><js>"property=/foo\\s+bar"</js> - Regular expression match 038 * </ul> 039 * 040 * <h5 class='section'>See Also:</h5><ul> 041 * <li class='link'><a class="doclink" href="../../../../index.html#jm.ObjectTools">Overview > juneau-marshall > Object Tools</a> 042 * </ul> 043 */ 044public class StringMatcherFactory extends MatcherFactory { 045 046 /** 047 * Default reusable matcher. 048 */ 049 public static final StringMatcherFactory DEFAULT = new StringMatcherFactory(); 050 051 @Override 052 public boolean canMatch(ClassMeta<?> cm) { 053 return true; 054 } 055 056 @Override 057 public AbstractMatcher create(String pattern) { 058 return new StringMatcher(pattern); 059 } 060 061 /** 062 * A construct representing a single search pattern. 063 */ 064 private static class StringMatcher extends AbstractMatcher { 065 private String pattern; 066 private static final AsciiSet 067 META_CHARS = AsciiSet.create("*?'\""), 068 SQ_CHAR = AsciiSet.create("'"), 069 DQ_CHAR = AsciiSet.create("\""), 070 REGEX_CHARS = AsciiSet.create("+\\[]{}()^$."); 071 072 Pattern[] orPatterns, andPatterns, notPatterns; 073 074 public StringMatcher(String searchPattern) { 075 076 this.pattern = searchPattern.trim(); 077 List<Pattern> ors = new LinkedList<>(); 078 List<Pattern> ands = new LinkedList<>(); 079 List<Pattern> nots = new LinkedList<>(); 080 081 for (String s : splitQuoted(pattern, true)) { 082 char c0 = s.charAt(0), c9 = s.charAt(s.length()-1); 083 084 if (c0 == '/' && c9 == '/' && s.length() > 1) { 085 ands.add(Pattern.compile(strip(s))); 086 } else { 087 char prefix = '^'; 088 boolean ignoreCase = false; 089 if (s.length() > 1 && (c0 == '^' || c0 == '+' || c0 == '-')) { 090 prefix = c0; 091 s = s.substring(1); 092 c0 = s.charAt(0); 093 } 094 095 if (c0 == '\'') { 096 s = unEscapeChars(strip(s), SQ_CHAR); 097 ignoreCase = true; 098 } else if (c0 == '"') { 099 s = unEscapeChars(strip(s), DQ_CHAR); 100 } 101 102 if (REGEX_CHARS.contains(s) || META_CHARS.contains(s)) { 103 StringBuilder sb = new StringBuilder(); 104 boolean isInEscape = false; 105 for (int i = 0; i < s.length(); i++) { 106 char c = s.charAt(i); 107 if (isInEscape) { 108 if (c == '?' || c == '*' || c == '\\') 109 sb.append('\\').append(c); 110 else 111 sb.append(c); 112 isInEscape = false; 113 } else { 114 if (c == '\\') 115 isInEscape = true; 116 else if (c == '?') 117 sb.append(".?"); 118 else if (c == '*') 119 sb.append(".*"); 120 else if (REGEX_CHARS.contains(c)) 121 sb.append("\\").append(c); 122 else 123 sb.append(c); 124 } 125 } 126 s = sb.toString(); 127 } 128 129 130 int flags = Pattern.DOTALL; 131 if (ignoreCase) 132 flags |= Pattern.CASE_INSENSITIVE; 133 134 Pattern p = Pattern.compile(s, flags); 135 136 if (prefix == '-') 137 nots.add(p); 138 else if (prefix == '+') 139 ands.add(p); 140 else 141 ors.add(p); 142 } 143 } 144 orPatterns = ors.toArray(new Pattern[ors.size()]); 145 andPatterns = ands.toArray(new Pattern[ands.size()]); 146 notPatterns = nots.toArray(new Pattern[nots.size()]); 147 } 148 149 @Override 150 public boolean matches(ClassMeta<?> cm, Object o) { 151 String s = (String)o; 152 for (int i = 0; i < andPatterns.length; i++) 153 if (! andPatterns[i].matcher(s).matches()) 154 return false; 155 for (int i = 0; i < notPatterns.length; i++) 156 if (notPatterns[i].matcher(s).matches()) 157 return false; 158 for (int i = 0; i < orPatterns.length; i++) 159 if (orPatterns[i].matcher(s).matches()) 160 return true; 161 return orPatterns.length == 0; 162 } 163 164 @Override 165 public String toString() { 166 return pattern; 167 } 168 } 169}