01 /**
02 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
03 */
04 package net.sourceforge.pmd.cpd;
05
06 import java.io.BufferedReader;
07 import java.io.CharArrayReader;
08 import java.util.NoSuchElementException;
09 import java.util.StringTokenizer;
10
11 /**
12 * This class does a best-guess try-anything tokenization.
13 *
14 * @author jheintz
15 */
16 public class AnyTokenizer implements Tokenizer {
17 public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:";
18
19 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
20 StringBuffer sb = sourceCode.getCodeBuffer();
21 BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
22 try {
23 int lineNumber = 1;
24 String line = reader.readLine();
25 while (line != null) {
26 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
27 try {
28 String token = tokenizer.nextToken();
29 while (token != null) {
30 if (!token.equals(" ") && !token.equals("\t")) {
31 tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
32 }
33 token = tokenizer.nextToken();
34 }
35 } catch (NoSuchElementException ex) {
36 // done with tokens
37 }
38 // advance iteration variables
39 line = reader.readLine();
40 lineNumber++;
41 }
42 } catch (Exception ex) {
43 ex.printStackTrace();
44 } finally {
45 try {
46 reader.close();
47 } catch (Exception ex) {
48 }
49 tokenEntries.add(TokenEntry.getEOF());
50 }
51 }
52 }
|