001 /**
002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003 */
004 package net.sourceforge.pmd.cpd;
005
006 import java.util.ArrayList;
007 import java.util.Collections;
008 import java.util.HashMap;
009 import java.util.Iterator;
010 import java.util.List;
011 import java.util.Map;
012
013 public class MatchAlgorithm {
014
015 private final static int MOD = 37;
016 private int lastHash;
017 private int lastMod = 1;
018
019 private List<Match> matches;
020 private Map<String, SourceCode> source;
021 private Tokens tokens;
022 private List<TokenEntry> code;
023 private CPDListener cpdListener;
024 private int min;
025
026 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
027 this(sourceCode, tokens, min, new CPDNullListener());
028 }
029
030 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
031 this.source = sourceCode;
032 this.tokens = tokens;
033 this.code = tokens.getTokens();
034 this.min = min;
035 this.cpdListener = listener;
036 for (int i = 0; i < min; i++) {
037 lastMod *= MOD;
038 }
039 }
040
041 public void setListener(CPDListener listener) {
042 this.cpdListener = listener;
043 }
044
045 public Iterator<Match> matches() {
046 return matches.iterator();
047 }
048
049 public TokenEntry tokenAt(int offset, TokenEntry m) {
050 return code.get(offset + m.getIndex());
051 }
052
053 public int getMinimumTileSize() {
054 return this.min;
055 }
056
057 public void findMatches() {
058 cpdListener.phaseUpdate(CPDListener.HASH);
059 Map<TokenEntry, Object> markGroups = hash();
060
061 cpdListener.phaseUpdate(CPDListener.MATCH);
062 MatchCollector matchCollector = new MatchCollector(this);
063 for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
064 Object o = i.next();
065 if (o instanceof List) {
066 List<TokenEntry> l = (List<TokenEntry>) o;
067
068 Collections.reverse(l);
069 matchCollector.collect(l);
070 }
071 i.remove();
072 }
073 cpdListener.phaseUpdate(CPDListener.GROUPING);
074 matches = matchCollector.getMatches();
075 matchCollector = null;
076 for (Match match: matches) {
077 for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
078 TokenEntry mark = occurrences.next();
079 match.setLineCount(tokens.getLineCount(mark, match));
080 if (!occurrences.hasNext()) {
081 int start = mark.getBeginLine();
082 int end = start + match.getLineCount() - 1;
083 SourceCode sourceCode = source.get(mark.getTokenSrcID());
084 match.setSourceCodeSlice(sourceCode.getSlice(start, end));
085 }
086 }
087 }
088 cpdListener.phaseUpdate(CPDListener.DONE);
089 }
090
091 @SuppressWarnings("PMD.JumbledIncrementer")
092 private Map<TokenEntry, Object> hash() {
093 Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
094 for (int i = code.size() - 1; i >= 0; i--) {
095 TokenEntry token = code.get(i);
096 if (token != TokenEntry.EOF) {
097 int last = tokenAt(min, token).getIdentifier();
098 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
099 token.setHashCode(lastHash);
100 Object o = markGroups.get(token);
101
102 // Note that this insertion method is worthwhile since the vast majority
103 // markGroup keys will have only one value.
104 if (o == null) {
105 markGroups.put(token, token);
106 } else if (o instanceof TokenEntry) {
107 List<TokenEntry> l = new ArrayList<TokenEntry>();
108 l.add((TokenEntry) o);
109 l.add(token);
110 markGroups.put(token, l);
111 } else {
112 List<TokenEntry> l = (List<TokenEntry>) o;
113 l.add(token);
114 }
115 } else {
116 lastHash = 0;
117 for (int end = Math.max(0, i - min + 1); i > end; i--) {
118 token = code.get(i - 1);
119 lastHash = MOD * lastHash + token.getIdentifier();
120 if (token == TokenEntry.EOF) {
121 break;
122 }
123 }
124 }
125 }
126 return markGroups;
127 }
128 }
|