AnyTokenizer.java
01 /**
02  * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
03  */
04 package net.sourceforge.pmd.cpd;
05 
06 import java.io.BufferedReader;
07 import java.io.CharArrayReader;
08 import java.util.NoSuchElementException;
09 import java.util.StringTokenizer;
10 
11 /**
12  * This class does a best-guess try-anything tokenization.
13  *
14  @author jheintz
15  */
16 public class AnyTokenizer implements Tokenizer {
17     public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:";
18 
19     public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
20         StringBuffer sb = sourceCode.getCodeBuffer();
21         BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
22         try {
23             int lineNumber = 1;
24             String line = reader.readLine();
25             while (line != null) {
26                 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
27                 try {
28                     String token = tokenizer.nextToken();
29                     while (token != null) {
30                         if (!token.equals(" "&& !token.equals("\t")) {
31                             tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
32                         }
33                         token = tokenizer.nextToken();
34                     }
35                 catch (NoSuchElementException ex) {
36                     // done with tokens
37                 }
38                 // advance iteration variables
39                 line = reader.readLine();
40                 lineNumber++;
41             }
42         catch (Exception ex) {
43             ex.printStackTrace();
44         finally {
45             try {
46                 reader.close();
47             catch (Exception ex) {
48             }
49             tokenEntries.add(TokenEntry.getEOF());
50         }
51     }
52 }