MatchAlgorithm.java


001 /**

002  * BSD-style license; for more info see http://pmd.sourceforge.net/license.html

003  */

004 package net.sourceforge.pmd.cpd;

005 

006 import java.util.ArrayList;

007 import java.util.Collections;

008 import java.util.HashMap;

009 import java.util.Iterator;

010 import java.util.List;

011 import java.util.Map;

012 

013 public class MatchAlgorithm {

014 

015     private final static int MOD = 37;

016     private int lastHash;

017     private int lastMod = 1;

018 

019     private List<Match> matches;

020     private Map<String, SourceCode> source;

021     private Tokens tokens;

022     private List<TokenEntry> code;

023     private CPDListener cpdListener;

024     private int min;

025 

026     public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {

027         this(sourceCode, tokens, min, new CPDNullListener());

028     }

029 

030     public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {

031         this.source = sourceCode;

032         this.tokens = tokens;

033         this.code = tokens.getTokens();

034         this.min = min;

035         this.cpdListener = listener;

036         for (int i = 0; i < min; i++) {

037             lastMod *= MOD;

038         }

039     }

040 

041     public void setListener(CPDListener listener) {

042         this.cpdListener = listener;

043     }

044 

045     public Iterator<Match> matches() {

046         return matches.iterator();

047     }

048 

049     public TokenEntry tokenAt(int offset, TokenEntry m) {

050         return code.get(offset + m.getIndex());

051     }

052 

053     public int getMinimumTileSize() {

054         return this.min;

055     }

056 

057     public void findMatches() {

058         cpdListener.phaseUpdate(CPDListener.HASH);

059         Map<TokenEntry, Object> markGroups = hash();

060 

061         cpdListener.phaseUpdate(CPDListener.MATCH);

062         MatchCollector matchCollector = new MatchCollector(this);

063         for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {

064             Object o = i.next();

065             if (o instanceof List) {

066                 List<TokenEntry> l = (List<TokenEntry>) o;

067 

068                 Collections.reverse(l);

069                 matchCollector.collect(l);

070             }

071             i.remove();

072         }

073         cpdListener.phaseUpdate(CPDListener.GROUPING);

074         matches = matchCollector.getMatches();

075         matchCollector = null;

076         for (Match match: matches) {

077             for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {

078                 TokenEntry mark = occurrences.next();

079                 match.setLineCount(tokens.getLineCount(mark, match));

080                 if (!occurrences.hasNext()) {

081                     int start = mark.getBeginLine();

082                     int end = start + match.getLineCount() - 1;

083                     SourceCode sourceCode = source.get(mark.getTokenSrcID());

084                     match.setSourceCodeSlice(sourceCode.getSlice(start, end));

085                 }

086             }

087         }

088         cpdListener.phaseUpdate(CPDListener.DONE);

089     }

090 

091     @SuppressWarnings("PMD.JumbledIncrementer")

092     private Map<TokenEntry, Object> hash() {

093         Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());

094         for (int i = code.size() - 1; i >= 0; i--) {

095             TokenEntry token = code.get(i);

096             if (token != TokenEntry.EOF) {

097                 int last = tokenAt(min, token).getIdentifier();

098                 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;

099                 token.setHashCode(lastHash);

100                 Object o = markGroups.get(token);

101 

102                 // Note that this insertion method is worthwhile since the vast majority

103                 // markGroup keys will have only one value.

104                 if (o == null) {

105                     markGroups.put(token, token);

106                 } else if (o instanceof TokenEntry) {

107                     List<TokenEntry> l = new ArrayList<TokenEntry>();

108                     l.add((TokenEntry) o);

109                     l.add(token);

110                     markGroups.put(token, l);

111                 } else {

112                     List<TokenEntry> l = (List<TokenEntry>) o;

113                     l.add(token);

114                 }

115             } else {

116                 lastHash = 0;

117                 for (int end = Math.max(0, i - min + 1); i > end; i--) {

118                     token = code.get(i - 1);

119                     lastHash = MOD * lastHash + token.getIdentifier();

120                     if (token == TokenEntry.EOF) {

121                         break;

122                     }

123                 }

124             }

125         }

126         return markGroups;

127     }

128 }