View Javadoc
1 /*** 2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 3 */ 4 package net.sourceforge.pmd.cpd; 5 6 import java.util.ArrayList; 7 import java.util.Collections; 8 import java.util.HashMap; 9 import java.util.HashSet; 10 import java.util.Iterator; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Set; 14 15 public class MatchCollector { 16 17 private MatchAlgorithm ma; 18 private Map startMap = new HashMap(); 19 private Map fileMap = new HashMap(); 20 21 public MatchCollector(MatchAlgorithm ma) { 22 this.ma = ma; 23 } 24 25 public void collect(int minimumLength, List marks) { 26 //first get a pairwise collection of all maximal matches 27 for (int i = 0; i < marks.size() - 1; i++) { 28 TokenEntry mark1 = (TokenEntry) marks.get(i); 29 for (int j = i + 1; j < marks.size(); j++) { 30 TokenEntry mark2 = (TokenEntry) marks.get(j); 31 int diff = mark1.getIndex() - mark2.getIndex(); 32 if (-diff < minimumLength) { 33 continue; 34 } 35 if (hasPreviousDupe(mark1, mark2)) { 36 continue; 37 } 38 int dupes = countDuplicateTokens(mark1, mark2); 39 //false positive check 40 if (dupes < minimumLength) { 41 continue; 42 } 43 //is it still too close together 44 if (diff + dupes >= 1) { 45 continue; 46 } 47 determineMatch(mark1, mark2, dupes); 48 } 49 } 50 } 51 52 public List getMatches() { 53 ArrayList matchList = new ArrayList(startMap.values()); 54 groupMatches(matchList); 55 return matchList; 56 } 57 58 /*** 59 * A greedy algorithm for determining non-overlapping matches 60 */ 61 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { 62 Match match = new Match(dupes, mark1, mark2); 63 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID(); 64 ArrayList pairMatches = (ArrayList) fileMap.get(fileKey); 65 if (pairMatches == null) { 66 pairMatches = new ArrayList(); 67 fileMap.put(fileKey, pairMatches); 68 } 69 boolean add = true; 70 for (int k = 0; k < pairMatches.size(); k++) { 71 Match other = (Match) pairMatches.get(k); 72 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() 73 > 0) { 74 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0; 75 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0)) 76 || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { 77 if (other.getTokenCount() >= match.getTokenCount()) { 78 add = false; 79 break; 80 } else { 81 pairMatches.remove(k); 82 startMap.remove(other.getMatchCode()); 83 } 84 } 85 } 86 } 87 if (add) { 88 pairMatches.add(match); 89 startMap.put(match.getMatchCode(), match); 90 } 91 } 92 93 private void groupMatches(ArrayList matchList) { 94 Collections.sort(matchList); 95 HashSet matchSet = new HashSet(); 96 Match.MatchCode matchCode = new Match.MatchCode(); 97 for (int i = matchList.size(); i > 1; i--) { 98 Match match1 = (Match) matchList.get(i - 1); 99 TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next(); 100 matchSet.clear(); 101 matchSet.add(match1.getMatchCode()); 102 for (int j = i - 1; j > 0; j--) { 103 Match match2 = (Match) matchList.get(j - 1); 104 if (match1.getTokenCount() != match2.getTokenCount()) { 105 break; 106 } 107 TokenEntry mark2 = null; 108 for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) { 109 mark2 = (TokenEntry) iter.next(); 110 if (mark2 != mark1) { 111 break; 112 } 113 } 114 int dupes = countDuplicateTokens(mark1, mark2); 115 if (dupes < match1.getTokenCount()) { 116 break; 117 } 118 matchSet.add(match2.getMatchCode()); 119 match1.getMarkSet().addAll(match2.getMarkSet()); 120 matchList.remove(i - 2); 121 i--; 122 } 123 if (matchSet.size() == 1) { 124 continue; 125 } 126 //prune the mark set 127 Set pruned = match1.getMarkSet(); 128 boolean done = false; 129 ArrayList a1 = new ArrayList(match1.getMarkSet()); 130 Collections.sort(a1); 131 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) { 132 TokenEntry cmark1 = (TokenEntry) a1.get(outer); 133 for (int inner = outer + 1; inner < a1.size() && !done; inner++) { 134 TokenEntry cmark2 = (TokenEntry) a1.get(inner); 135 matchCode.setFirst(cmark1.getIndex()); 136 matchCode.setSecond(cmark2.getIndex()); 137 if (!matchSet.contains(matchCode)) { 138 if (pruned.size() > 2) { 139 pruned.remove(cmark2); 140 } 141 if (pruned.size() == 2) { 142 done = true; 143 } 144 } 145 } 146 } 147 } 148 } 149 150 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { 151 if (mark1.getIndex() == 0) { 152 return false; 153 } 154 return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); 155 } 156 157 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { 158 int index = 0; 159 while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { 160 index++; 161 } 162 return index; 163 } 164 165 private boolean matchEnded(TokenEntry token1, TokenEntry token2) { 166 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF; 167 } 168 }

This page was automatically generated by Maven