StringUtil.java


001 /**

002  * BSD-style license; for more info see http://pmd.sourceforge.net/license.html

003  */

004 package net.sourceforge.pmd.util;

005 

006 import java.util.ArrayList;

007 import java.util.Iterator;

008 import java.util.List;

009 

010 public final class StringUtil {

011 

012   public static final String[] EMPTY_STRINGS = new String[0];

013     private static final boolean SUPPORTS_UTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");

014     private static final String[] ENTITIES;

015 

016     static {

017         ENTITIES = new String[256 - 126];

018         for (int i = 126; i <= 255; i++) {

019             ENTITIES[i - 126] = "&#" + i + ';';

020         }

021     }

022 

023     private StringUtil() {}

024     

025     /**

026      * Returns true if the value arg is either null, empty, or full of whitespace characters.

027      * More efficient that calling (string).trim().length() == 0

028      * 

029      * @param value

030      * @return <code>true</code> if the value is empty, <code>false</code> otherwise.

031      */

032     public static boolean isEmpty(String value) {

033       

034       if (value == null || "".equals(value)) {

035         return true;

036       }

037       

038       for (int i=0; i<value.length(); i++) {

039         if (!Character.isWhitespace(value.charAt(i))) {

040             return false;

041         }

042       }

043       

044       return true;

045     }

046     

047     /**

048      * Returns true if both strings are effectively null or whitespace,

049      * returns false otherwise if they have actual text that differs.

050      * 

051      * @param a

052      * @param b

053      * @return boolean

054      */

055   public static boolean areSemanticEquals(String a, String b) {

056     

057     if (a==null) { return isEmpty(b); }

058     if (b==null) { return isEmpty(a); }

059     

060     return a.equals(b);

061   }

062     

063     public static String replaceString(final String original, char oldChar, final String newString) {

064   int index = original.indexOf(oldChar);

065   if (index < 0) {

066       return original;

067   } else {

068       final String replace = newString == null ? "" : newString;

069       final StringBuilder buf = new StringBuilder(Math.max(16, original.length() + replace.length()));

070       int last = 0;

071       while (index != -1) {

072     buf.append(original.substring(last, index));

073     buf.append(replace);

074     last = index + 1;

075     index = original.indexOf(oldChar, last);

076       }

077       buf.append(original.substring(last));

078       return buf.toString();

079   }

080     }

081 

082     public static String replaceString(final String original, final String oldString, final String newString) {

083   int index = original.indexOf(oldString);

084   if (index < 0) {

085       return original;

086   } else {

087       final String replace = newString == null ? "" : newString;

088       final StringBuilder buf = new StringBuilder(Math.max(16, original.length() + replace.length()));

089       int last = 0;

090       while (index != -1) {

091     buf.append(original.substring(last, index));

092     buf.append(replace);

093     last = index + oldString.length();

094     index = original.indexOf(oldString, last);

095       }

096       buf.append(original.substring(last));

097       return buf.toString();

098   }

099     }

100 

101     /**

102      * Appends to a StringBuffer the String src where non-ASCII and

103      * XML special chars are escaped.

104      *

105      * @param buf The destination XML stream

106      * @param src The String to append to the stream

107      */

108     public static void appendXmlEscaped(StringBuffer buf, String src) {

109         appendXmlEscaped(buf, src, SUPPORTS_UTF8);

110     }

111 

112     public static String htmlEncode(String string) {

113         String encoded = replaceString(string, '&', "&amp;");

114         encoded = replaceString(encoded, '<', "&lt;");

115         return replaceString(encoded, '>', "&gt;");

116     }

117     

118     // TODO - unify the method above with the one below

119     

120     private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {

121         char c;

122         for (int i = 0; i < src.length(); i++) {

123             c = src.charAt(i);

124             if (c > '~') {// 126

125                 if (!supportUTF8) {

126                     if (c <= 255) {

127                         buf.append(ENTITIES[c - 126]);

128                     } else {

129                         buf.append("&u").append(Integer.toHexString(c)).append(';');

130                     }

131                 } else {

132                     buf.append(c);

133                 }

134             } else if (c == '&') {

135                 buf.append("&amp;");

136             } else if (c == '"') {

137                 buf.append("&quot;");

138             } else if (c == '<') {

139                 buf.append("&lt;");

140             } else if (c == '>') {

141                 buf.append("&gt;");

142             } else {

143                 buf.append(c);

144             }

145         }

146     }

147 

148   /**

149    * Parses the input source using the delimiter specified. This method is much

150    * faster than using the StringTokenizer or String.split(char) approach and

151    * serves as a replacement for String.split() for JDK1.3 that doesn't have it.

152      *

153      * FIXME - we're on JDK 1.4 now, can we replace this with String.split?

154    *

155    * @param source String

156    * @param delimiter char

157    * @return String[]

158    */

159   public static String[] substringsOf(String source, char delimiter) {

160 

161     if (source == null || source.length() == 0) {

162             return EMPTY_STRINGS;

163         }

164     

165     int delimiterCount = 0;

166     int length = source.length();

167     char[] chars = source.toCharArray();

168 

169     for (int i=0; i<length; i++) {

170       if (chars[i] == delimiter) {

171           delimiterCount++;

172       }

173       }

174 

175     if (delimiterCount == 0) {

176         return new String[] { source };

177     }

178 

179     String results[] = new String[delimiterCount+1];

180 

181     int i = 0;

182     int offset = 0;

183 

184     while (offset <= length) {

185       int pos = source.indexOf(delimiter, offset);

186       if (pos < 0) {

187           pos = length;

188       }

189       results[i++] = pos == offset ? "" : source.substring(offset, pos);

190       offset = pos + 1;

191       }

192 

193     return results;

194   }

195   

196   /**

197    * Much more efficient than StringTokenizer.

198    * 

199    * @param str String

200    * @param separator char

201    * @return String[]

202    */

203     public static String[] substringsOf(String str, String separator) {

204       

205           if (str == null || str.length() == 0) {

206               return EMPTY_STRINGS;

207           }

208 

209           int index = str.indexOf(separator);

210           if (index == -1) {

211               return new String[]{str};

212           }

213 

214           List<String> list = new ArrayList<String>();

215           int currPos = 0;

216           int len = separator.length();

217           while (index != -1) {

218               list.add(str.substring(currPos, index));

219               currPos = index + len;

220               index = str.indexOf(separator, currPos);

221           }

222           list.add(str.substring(currPos));

223           return list.toArray(new String[list.size()]);

224       }

225   

226   

227   /**

228    * Copies the elements returned by the iterator onto the string buffer

229    * each delimited by the separator.

230    *

231    * @param sb StringBuffer

232    * @param iter Iterator

233    * @param separator String

234    */

235   public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {

236     

237       if (!iter.hasNext()) { return;  }

238       

239       sb.append(iter.next());

240       

241       while (iter.hasNext()) {

242         sb.append(separator);

243           sb.append(iter.next());

244       }

245   }

246   /**

247    * Return the length of the shortest string in the array.

248    * If the collection is empty or any one of them is 

249    * null then it returns 0.

250    * 

251    * @param strings String[]

252    * @return int

253    */

254   public static int lengthOfShortestIn(String[] strings) {

255     

256       if (CollectionUtil.isEmpty(strings)) { return 0; }

257       

258     int minLength = Integer.MAX_VALUE;

259     

260     for (int i=0; i<strings.length; i++) {

261       if (strings[i] == null) {

262           return 0;

263       }

264       minLength = Math.min(minLength, strings[i].length());

265     }

266     

267     return minLength;

268   }

269   

270   /**

271    * Determine the maximum number of common leading whitespace characters

272    * the strings share in the same sequence. Useful for determining how

273    * many leading characters can be removed to shift all the text in the

274    * strings to the left without misaligning them.

275    * 

276    * @param strings String[]

277    * @return int

278    */

279   public static int maxCommonLeadingWhitespaceForAll(String[] strings) {

280     

281     int shortest = lengthOfShortestIn(strings);

282     if (shortest == 0) {

283         return 0;

284     }

285     

286     char[] matches = new char[shortest];

287     

288     String str;

289     for (int m=0; m<matches.length; m++) {

290       matches[m] = strings[0].charAt(m);

291       if (!Character.isWhitespace(matches[m])) {

292           return m;

293       }

294       for (int i=0; i<strings.length; i++) {

295         str = strings[i];

296         if (str.charAt(m) != matches[m]) {

297             return m; 

298         }

299         }

300     }

301     

302     return shortest;

303   }

304   

305   /**

306    * Trims off the leading characters off the strings up to the trimDepth 

307    * specified. Returns the same strings if trimDepth = 0

308    * 

309    * @param strings

310    * @param trimDepth

311    * @return String[]

312    */

313   public static String[] trimStartOn(String[] strings, int trimDepth) {

314     

315     if (trimDepth == 0) {

316         return strings;

317     }

318     

319     String[] results = new String[strings.length];

320     for (int i=0; i<strings.length; i++) {

321       results[i] = strings[i].substring(trimDepth);

322     }

323     return results;

324    }

325   

326     /**

327      * Left pads a string.

328      * @param s The String to pad

329      * @param length The desired minimum length of the resulting padded String

330      * @return The resulting left padded String

331      */

332     public static String lpad(String s, int length) {

333          String res = s;

334          if (length - s.length() > 0) {

335              char [] arr = new char[length - s.length()];

336              java.util.Arrays.fill(arr, ' ');

337              res = new StringBuffer(length).append(arr).append(s).toString();

338          }

339          return res;

340     }

341     

342     /**

343      * Are the two String values the same.

344      * The Strings can be optionally trimmed before checking.

345      * The Strings can be optionally compared ignoring case.

346      * The Strings can be have embedded whitespace standardized before comparing.

347      * Two null values are treated as equal.

348      * 

349      * @param s1 The first String.

350      * @param s2 The second String.

351      * @param trim Indicates if the Strings should be trimmed before comparison.

352      * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.

353      * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.

354      * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.

355      */

356     @SuppressWarnings("PMD.CompareObjectsWithEquals")

357     public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {

358     if (s1 == s2) {

359       return true;

360     } else if (s1 == null || s2 == null) {

361       return false;

362     } else {

363       if (trim) {

364         s1 = s1.trim();

365         s2 = s2.trim();

366       }

367       if (standardizeWhitespace) {

368         // Replace all whitespace with a standard single space character.

369         s1 = s1.replaceAll("\\s+", " ");

370         s2 = s2.replaceAll("\\s+", " ");

371       }

372       return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);

373     }

374     }

375     

376   /**

377    * Formats all items onto a string with separators if more than one

378    * exists, return an empty string if the items are null or empty.

379    * 

380    * @param items Object[]

381    * @param separator String

382    * @return String

383    */

384   public static String asString(Object[] items, String separator) {

385     

386     if (items == null || items.length == 0) { return ""; }

387     if (items.length == 1) { return items[0].toString(); }

388     

389     StringBuilder sb = new StringBuilder(items[0].toString());

390     for (int i=1; i<items.length; i++) {

391       sb.append(separator).append(items[i]);

392     }

393     

394     return sb.toString();

395   }

396 }