001 /**
002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003 */
004 package net.sourceforge.pmd.util;
005
006 import java.util.ArrayList;
007 import java.util.Iterator;
008 import java.util.List;
009
010 public final class StringUtil {
011
012 public static final String[] EMPTY_STRINGS = new String[0];
013 private static final boolean SUPPORTS_UTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");
014 private static final String[] ENTITIES;
015
016 static {
017 ENTITIES = new String[256 - 126];
018 for (int i = 126; i <= 255; i++) {
019 ENTITIES[i - 126] = "&#" + i + ';';
020 }
021 }
022
023 private StringUtil() {}
024
025 /**
026 * Returns true if the value arg is either null, empty, or full of whitespace characters.
027 * More efficient that calling (string).trim().length() == 0
028 *
029 * @param value
030 * @return <code>true</code> if the value is empty, <code>false</code> otherwise.
031 */
032 public static boolean isEmpty(String value) {
033
034 if (value == null || "".equals(value)) {
035 return true;
036 }
037
038 for (int i=0; i<value.length(); i++) {
039 if (!Character.isWhitespace(value.charAt(i))) {
040 return false;
041 }
042 }
043
044 return true;
045 }
046
047 /**
048 * Returns true if both strings are effectively null or whitespace,
049 * returns false otherwise if they have actual text that differs.
050 *
051 * @param a
052 * @param b
053 * @return boolean
054 */
055 public static boolean areSemanticEquals(String a, String b) {
056
057 if (a==null) { return isEmpty(b); }
058 if (b==null) { return isEmpty(a); }
059
060 return a.equals(b);
061 }
062
063 public static String replaceString(final String original, char oldChar, final String newString) {
064 int index = original.indexOf(oldChar);
065 if (index < 0) {
066 return original;
067 } else {
068 final String replace = newString == null ? "" : newString;
069 final StringBuilder buf = new StringBuilder(Math.max(16, original.length() + replace.length()));
070 int last = 0;
071 while (index != -1) {
072 buf.append(original.substring(last, index));
073 buf.append(replace);
074 last = index + 1;
075 index = original.indexOf(oldChar, last);
076 }
077 buf.append(original.substring(last));
078 return buf.toString();
079 }
080 }
081
082 public static String replaceString(final String original, final String oldString, final String newString) {
083 int index = original.indexOf(oldString);
084 if (index < 0) {
085 return original;
086 } else {
087 final String replace = newString == null ? "" : newString;
088 final StringBuilder buf = new StringBuilder(Math.max(16, original.length() + replace.length()));
089 int last = 0;
090 while (index != -1) {
091 buf.append(original.substring(last, index));
092 buf.append(replace);
093 last = index + oldString.length();
094 index = original.indexOf(oldString, last);
095 }
096 buf.append(original.substring(last));
097 return buf.toString();
098 }
099 }
100
101 /**
102 * Appends to a StringBuffer the String src where non-ASCII and
103 * XML special chars are escaped.
104 *
105 * @param buf The destination XML stream
106 * @param src The String to append to the stream
107 */
108 public static void appendXmlEscaped(StringBuffer buf, String src) {
109 appendXmlEscaped(buf, src, SUPPORTS_UTF8);
110 }
111
112 public static String htmlEncode(String string) {
113 String encoded = replaceString(string, '&', "&");
114 encoded = replaceString(encoded, '<', "<");
115 return replaceString(encoded, '>', ">");
116 }
117
118 // TODO - unify the method above with the one below
119
120 private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {
121 char c;
122 for (int i = 0; i < src.length(); i++) {
123 c = src.charAt(i);
124 if (c > '~') {// 126
125 if (!supportUTF8) {
126 if (c <= 255) {
127 buf.append(ENTITIES[c - 126]);
128 } else {
129 buf.append("&u").append(Integer.toHexString(c)).append(';');
130 }
131 } else {
132 buf.append(c);
133 }
134 } else if (c == '&') {
135 buf.append("&");
136 } else if (c == '"') {
137 buf.append(""");
138 } else if (c == '<') {
139 buf.append("<");
140 } else if (c == '>') {
141 buf.append(">");
142 } else {
143 buf.append(c);
144 }
145 }
146 }
147
148 /**
149 * Parses the input source using the delimiter specified. This method is much
150 * faster than using the StringTokenizer or String.split(char) approach and
151 * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
152 *
153 * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
154 *
155 * @param source String
156 * @param delimiter char
157 * @return String[]
158 */
159 public static String[] substringsOf(String source, char delimiter) {
160
161 if (source == null || source.length() == 0) {
162 return EMPTY_STRINGS;
163 }
164
165 int delimiterCount = 0;
166 int length = source.length();
167 char[] chars = source.toCharArray();
168
169 for (int i=0; i<length; i++) {
170 if (chars[i] == delimiter) {
171 delimiterCount++;
172 }
173 }
174
175 if (delimiterCount == 0) {
176 return new String[] { source };
177 }
178
179 String results[] = new String[delimiterCount+1];
180
181 int i = 0;
182 int offset = 0;
183
184 while (offset <= length) {
185 int pos = source.indexOf(delimiter, offset);
186 if (pos < 0) {
187 pos = length;
188 }
189 results[i++] = pos == offset ? "" : source.substring(offset, pos);
190 offset = pos + 1;
191 }
192
193 return results;
194 }
195
196 /**
197 * Much more efficient than StringTokenizer.
198 *
199 * @param str String
200 * @param separator char
201 * @return String[]
202 */
203 public static String[] substringsOf(String str, String separator) {
204
205 if (str == null || str.length() == 0) {
206 return EMPTY_STRINGS;
207 }
208
209 int index = str.indexOf(separator);
210 if (index == -1) {
211 return new String[]{str};
212 }
213
214 List<String> list = new ArrayList<String>();
215 int currPos = 0;
216 int len = separator.length();
217 while (index != -1) {
218 list.add(str.substring(currPos, index));
219 currPos = index + len;
220 index = str.indexOf(separator, currPos);
221 }
222 list.add(str.substring(currPos));
223 return list.toArray(new String[list.size()]);
224 }
225
226
227 /**
228 * Copies the elements returned by the iterator onto the string buffer
229 * each delimited by the separator.
230 *
231 * @param sb StringBuffer
232 * @param iter Iterator
233 * @param separator String
234 */
235 public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {
236
237 if (!iter.hasNext()) { return; }
238
239 sb.append(iter.next());
240
241 while (iter.hasNext()) {
242 sb.append(separator);
243 sb.append(iter.next());
244 }
245 }
246 /**
247 * Return the length of the shortest string in the array.
248 * If the collection is empty or any one of them is
249 * null then it returns 0.
250 *
251 * @param strings String[]
252 * @return int
253 */
254 public static int lengthOfShortestIn(String[] strings) {
255
256 if (CollectionUtil.isEmpty(strings)) { return 0; }
257
258 int minLength = Integer.MAX_VALUE;
259
260 for (int i=0; i<strings.length; i++) {
261 if (strings[i] == null) {
262 return 0;
263 }
264 minLength = Math.min(minLength, strings[i].length());
265 }
266
267 return minLength;
268 }
269
270 /**
271 * Determine the maximum number of common leading whitespace characters
272 * the strings share in the same sequence. Useful for determining how
273 * many leading characters can be removed to shift all the text in the
274 * strings to the left without misaligning them.
275 *
276 * @param strings String[]
277 * @return int
278 */
279 public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
280
281 int shortest = lengthOfShortestIn(strings);
282 if (shortest == 0) {
283 return 0;
284 }
285
286 char[] matches = new char[shortest];
287
288 String str;
289 for (int m=0; m<matches.length; m++) {
290 matches[m] = strings[0].charAt(m);
291 if (!Character.isWhitespace(matches[m])) {
292 return m;
293 }
294 for (int i=0; i<strings.length; i++) {
295 str = strings[i];
296 if (str.charAt(m) != matches[m]) {
297 return m;
298 }
299 }
300 }
301
302 return shortest;
303 }
304
305 /**
306 * Trims off the leading characters off the strings up to the trimDepth
307 * specified. Returns the same strings if trimDepth = 0
308 *
309 * @param strings
310 * @param trimDepth
311 * @return String[]
312 */
313 public static String[] trimStartOn(String[] strings, int trimDepth) {
314
315 if (trimDepth == 0) {
316 return strings;
317 }
318
319 String[] results = new String[strings.length];
320 for (int i=0; i<strings.length; i++) {
321 results[i] = strings[i].substring(trimDepth);
322 }
323 return results;
324 }
325
326 /**
327 * Left pads a string.
328 * @param s The String to pad
329 * @param length The desired minimum length of the resulting padded String
330 * @return The resulting left padded String
331 */
332 public static String lpad(String s, int length) {
333 String res = s;
334 if (length - s.length() > 0) {
335 char [] arr = new char[length - s.length()];
336 java.util.Arrays.fill(arr, ' ');
337 res = new StringBuffer(length).append(arr).append(s).toString();
338 }
339 return res;
340 }
341
342 /**
343 * Are the two String values the same.
344 * The Strings can be optionally trimmed before checking.
345 * The Strings can be optionally compared ignoring case.
346 * The Strings can be have embedded whitespace standardized before comparing.
347 * Two null values are treated as equal.
348 *
349 * @param s1 The first String.
350 * @param s2 The second String.
351 * @param trim Indicates if the Strings should be trimmed before comparison.
352 * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
353 * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
354 * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
355 */
356 @SuppressWarnings("PMD.CompareObjectsWithEquals")
357 public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {
358 if (s1 == s2) {
359 return true;
360 } else if (s1 == null || s2 == null) {
361 return false;
362 } else {
363 if (trim) {
364 s1 = s1.trim();
365 s2 = s2.trim();
366 }
367 if (standardizeWhitespace) {
368 // Replace all whitespace with a standard single space character.
369 s1 = s1.replaceAll("\\s+", " ");
370 s2 = s2.replaceAll("\\s+", " ");
371 }
372 return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
373 }
374 }
375
376 /**
377 * Formats all items onto a string with separators if more than one
378 * exists, return an empty string if the items are null or empty.
379 *
380 * @param items Object[]
381 * @param separator String
382 * @return String
383 */
384 public static String asString(Object[] items, String separator) {
385
386 if (items == null || items.length == 0) { return ""; }
387 if (items.length == 1) { return items[0].toString(); }
388
389 StringBuilder sb = new StringBuilder(items[0].toString());
390 for (int i=1; i<items.length; i++) {
391 sb.append(separator).append(items[i]);
392 }
393
394 return sb.toString();
395 }
396 }
|