001 /**
002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003 */
004 package net.sourceforge.pmd.cpd;
005
006 import java.io.File;
007 import java.io.FileNotFoundException;
008 import java.io.IOException;
009 import java.util.HashMap;
010 import java.util.HashSet;
011 import java.util.Iterator;
012 import java.util.List;
013 import java.util.Map;
014 import java.util.Set;
015
016 import net.sourceforge.pmd.util.FileFinder;
017
018 public class CPD {
019
020 private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
021 private CPDListener listener = new CPDNullListener();
022 private Tokens tokens = new Tokens();
023 private int minimumTileSize;
024 private MatchAlgorithm matchAlgorithm;
025 private Language language;
026 private boolean skipDuplicates;
027 public static boolean debugEnable = false;
028 private String encoding = System.getProperty("file.encoding");
029
030
031 public CPD(int minimumTileSize, Language language) {
032 this.minimumTileSize = minimumTileSize;
033 this.language = language;
034 }
035
036 public void skipDuplicates() {
037 this.skipDuplicates = true;
038 }
039
040 public void setCpdListener(CPDListener cpdListener) {
041 this.listener = cpdListener;
042 }
043
044 public void setEncoding(String encoding) {
045 this.encoding = encoding;
046 }
047
048 public void go() {
049 TokenEntry.clearImages();
050 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
051 matchAlgorithm.findMatches();
052 }
053
054 public Iterator<Match> getMatches() {
055 return matchAlgorithm.matches();
056 }
057
058 public void add(File file) throws IOException {
059 add(1, file);
060 }
061
062 public void addAllInDirectory(String dir) throws IOException {
063 addDirectory(dir, false);
064 }
065
066 public void addRecursively(String dir) throws IOException {
067 addDirectory(dir, true);
068 }
069
070 public void add(List<File> files) throws IOException {
071 for (File f: files) {
072 add(files.size(), f);
073 }
074 }
075
076 private void addDirectory(String dir, boolean recurse) throws IOException {
077 if (!(new File(dir)).exists()) {
078 throw new FileNotFoundException("Couldn't find directory " + dir);
079 }
080 FileFinder finder = new FileFinder();
081 // TODO - could use SourceFileSelector here
082 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
083 }
084
085 private Set<String> current = new HashSet<String>();
086
087 private void add(int fileCount, File file) throws IOException {
088
089 if (skipDuplicates) {
090 // TODO refactor this thing into a separate class
091 String signature = file.getName() + '_' + file.length();
092 if (current.contains(signature)) {
093 System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
094 return;
095 }
096 current.add(signature);
097 }
098
099 if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
100 System.err.println("Skipping " + file + " since it appears to be a symlink");
101 return;
102 }
103
104 listener.addedFile(fileCount, file);
105 SourceCode sourceCode = new SourceCode(new SourceCode.FileCodeLoader(file, encoding));
106 language.getTokenizer().tokenize(sourceCode, tokens);
107 source.put(sourceCode.getFileName(), sourceCode);
108 }
109
110 public static Renderer getRendererFromString(String name, String encoding) {
111 if (name.equalsIgnoreCase("text") || name.equals("")) {
112 return new SimpleRenderer();
113 } else if ("xml".equals(name)) {
114 return new XMLRenderer(encoding);
115 } else if ("csv".equals(name)) {
116 return new CSVRenderer();
117 } else if ("vs".equals(name)) {
118 return new VSRenderer();
119 }
120 try {
121 return (Renderer) Class.forName(name).newInstance();
122 } catch (Exception e) {
123 System.out.println("Can't find class '" + name + "', defaulting to SimpleRenderer.");
124 }
125 return new SimpleRenderer();
126 }
127
128 private static boolean findBooleanSwitch(String[] args, String name) {
129 for (int i = 0; i < args.length; i++) {
130 if (args[i].equals(name)) {
131 return true;
132 }
133 }
134 return false;
135 }
136
137 private static String findRequiredStringValue(String[] args, String name) {
138 for (int i = 0; i < args.length; i++) {
139 if (args[i].equals(name)) {
140 return args[i + 1];
141 }
142 }
143 System.out.println("No " + name + " value passed in");
144 usage();
145 throw new RuntimeException();
146 }
147
148 private static String findOptionalStringValue(String[] args, String name, String defaultValue) {
149 for (int i = 0; i < args.length; i++) {
150 if (args[i].equals(name)) {
151 return args[i + 1];
152 }
153 }
154 return defaultValue;
155 }
156
157 public static void main(String[] args) {
158 if (args.length == 0) {
159 usage();
160 }
161
162 try {
163 boolean skipDuplicateFiles = findBooleanSwitch(args, "--skip-duplicate-files");
164 String languageString = findOptionalStringValue(args, "--language", "java");
165 String formatString = findOptionalStringValue(args, "--format", "text");
166 String encodingString = findOptionalStringValue(args, "--encoding", System.getProperty("file.encoding"));
167 int minimumTokens = Integer.parseInt(findRequiredStringValue(args, "--minimum-tokens"));
168 LanguageFactory f = new LanguageFactory();
169 Language language = f.createLanguage(languageString);
170 Renderer renderer = CPD.getRendererFromString(formatString, encodingString);
171 CPD cpd = new CPD(minimumTokens, language);
172 cpd.setEncoding(encodingString);
173 if (skipDuplicateFiles) {
174 cpd.skipDuplicates();
175 }
176 /* FIXME: Improve this !!! */
177 boolean missingFiles = true;
178 for (int position = 0; position < args.length; position++) {
179 if (args[position].equals("--files")) {
180 cpd.addRecursively(args[position + 1]);
181 if ( missingFiles ) {
182 missingFiles = false;
183 }
184 }
185 }
186
187 if ( missingFiles ) {
188 System.out.println("No " + "--files" + " value passed in");
189 usage();
190 throw new RuntimeException();
191 }
192
193 cpd.go();
194 System.out.println(renderer.render(cpd.getMatches()));
195 } catch (Exception e) {
196 e.printStackTrace();
197 }
198 }
199
200 private static void usage() {
201 System.out.println("Usage:");
202 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
203 System.out.println("i.e: ");
204 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
205 System.out.println("or: ");
206 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
207 System.out.println("or: ");
208 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
209 }
210
211 }
|