diff --git a/.classpath b/.classpath index f358e62..736f5a7 100644 --- a/.classpath +++ b/.classpath @@ -13,20 +13,7 @@ - - - - - - - - - - - - - - + diff --git a/src/main/java/hgu/csee/isel/alinew/szz/AGSZZ.java b/src/main/java/hgu/csee/isel/alinew/szz/AGSZZ.java index 465d16f..73e0177 100644 --- a/src/main/java/hgu/csee/isel/alinew/szz/AGSZZ.java +++ b/src/main/java/hgu/csee/isel/alinew/szz/AGSZZ.java @@ -3,9 +3,7 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; -import java.util.Set; import org.apache.commons.io.FileUtils; import org.eclipse.jgit.api.Git; @@ -13,7 +11,6 @@ import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; -import hgu.csee.isel.alinew.szz.exception.EmptyHunkTypeException; import hgu.csee.isel.alinew.szz.graph.AnnotationGraphBuilder; import hgu.csee.isel.alinew.szz.graph.AnnotationGraphModel; import hgu.csee.isel.alinew.szz.model.Line; @@ -27,6 +24,7 @@ public class AGSZZ { private List BFCList = new ArrayList<>(); private boolean debug; private File localPath; + public static AnnotationGraphModel agm = new AnnotationGraphModel(); public AGSZZ(String gIT_URL, List bFCList, boolean debug) { this.GIT_URL = gIT_URL; @@ -38,55 +36,55 @@ public void run() throws IOException { try { // Clone final String REMOTE_URI = GIT_URL + ".git"; - + // prepare a new folder for the cloned repository localPath = File.createTempFile("TestGitRepository", ""); if (!localPath.delete()) { throw new IOException("Could not delete temporary file " + localPath); } - + System.out.println("\nCloning from " + REMOTE_URI + " to " + localPath); - Git git = Git.cloneRepository() - .setURI(REMOTE_URI) - .setDirectory(localPath) - .call(); + Git git = Git.cloneRepository().setURI(REMOTE_URI).setDirectory(localPath).call(); System.out.println("Having repository: " + git.getRepository().getDirectory()); Repository repo = git.getRepository(); - + List revs = GitUtils.getRevs(git); - + List bfcList = GitUtils.getBFCList(BFCList, revs); - + List targetPaths = GitUtils.getTargetPaths(repo, bfcList); - - RevsWithPath revsWithPath = GitUtils.collectRevsWithSpecificPath(GitUtils.configurePathRevisionList(repo, revs), targetPaths); - + + RevsWithPath revsWithPath = GitUtils + .collectRevsWithSpecificPath(GitUtils.configurePathRevisionList(repo, revs), targetPaths); + // Phase 1 : Build the annotation graph final long startBuildingTime = System.currentTimeMillis(); AnnotationGraphBuilder agb = new AnnotationGraphBuilder(); - AnnotationGraphModel agm = agb.buildAnnotationGraph(repo, revsWithPath, debug); + agb.buildAnnotationGraph(repo, revsWithPath, debug); final long endBuildingTime = System.currentTimeMillis(); System.out.println("\nBuilding Annotation Graph takes " + (endBuildingTime - startBuildingTime) / 1000.0 + "s\n"); - // Phase 2 : Trace and collect BIC candidates and filter out format changes, comments, etc among candidates + // Phase 2 : Trace and collect BIC candidates and filter out format changes, + // comments, etc among candidates final long startTracingTime = System.currentTimeMillis(); - + Tracer tracer = new Tracer(); - //List BILines = tracer.collectBILines(repo, revs, agm, revsWithPath, BFCList, debug); + // List BILines = tracer.collectBILines(repo, revs, agm, revsWithPath, + // BFCList, debug); List BILines = tracer.collectBILines(repo, bfcList, agm, revsWithPath, debug); - + final long endTracingTime = System.currentTimeMillis(); System.out.println("\nCollecting BICs takes " + (endTracingTime - startTracingTime) / 1000.0 + "s\n"); - + // Phase 3 : store outputs Utils.storeOutputFile(GIT_URL, BILines); - } catch (IOException | GitAPIException | EmptyHunkTypeException e) { + } catch (IOException | GitAPIException e) { e.printStackTrace(); } finally { // clean up here to not keep using more and more disk-space for these samples diff --git a/src/main/java/hgu/csee/isel/alinew/szz/graph/AnnotationGraphBuilder.java b/src/main/java/hgu/csee/isel/alinew/szz/graph/AnnotationGraphBuilder.java index 7d58922..1f91cb2 100644 --- a/src/main/java/hgu/csee/isel/alinew/szz/graph/AnnotationGraphBuilder.java +++ b/src/main/java/hgu/csee/isel/alinew/szz/graph/AnnotationGraphBuilder.java @@ -8,11 +8,13 @@ import org.eclipse.jgit.diff.Edit; import org.eclipse.jgit.diff.EditList; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import com.google.common.collect.Iterators; +import hgu.csee.isel.alinew.szz.AGSZZ; import hgu.csee.isel.alinew.szz.exception.EmptyHunkTypeException; import hgu.csee.isel.alinew.szz.model.Hunk; import hgu.csee.isel.alinew.szz.model.Line; @@ -21,234 +23,294 @@ import hgu.csee.isel.alinew.szz.util.GitUtils; import hgu.csee.isel.alinew.szz.util.Utils; -public class AnnotationGraphBuilder { +public class AnnotationGraphBuilder extends Thread { - public AnnotationGraphModel buildAnnotationGraph(Repository repo, RevsWithPath revsWithPath, boolean debug) - throws IOException, EmptyHunkTypeException { - // Generate Annotation Graph - AnnotationGraphModel annotationGraph = new AnnotationGraphModel(); + private final int THREADS = 3; + Repository repo; + RevsWithPath revsWithPath; + List list = new ArrayList<>(); + boolean debug; + Iterator paths; + + public AnnotationGraphBuilder() { + + } + + public AnnotationGraphBuilder(Repository repo, RevsWithPath revsWithPath, List list, boolean debug) { + this.repo = repo; + this.revsWithPath = revsWithPath; + for (int i = 0; i < list.size(); i++) { + this.list.add(list.get(i)); + } +// this.list = list; + this.debug = debug; + } + + public void buildAnnotationGraph(Repository repo, RevsWithPath revsWithPath, boolean debug) { + + List keyList = new ArrayList(revsWithPath.keySet()); + System.out.println("list의 사이즈: " + keyList.size()); + + int arr_size; + if (keyList.size() < THREADS) { + arr_size = keyList.size(); + } else { + arr_size = keyList.size() / THREADS; + } + + AnnotationGraphBuilder agb1 = new AnnotationGraphBuilder(repo, revsWithPath, keyList.subList(0, arr_size), debug); + AnnotationGraphBuilder agb2 = new AnnotationGraphBuilder(repo, revsWithPath, keyList.subList(arr_size, 2 * arr_size), debug); + AnnotationGraphBuilder agb3 = new AnnotationGraphBuilder(repo, revsWithPath, keyList.subList(2 * arr_size, keyList.size()), debug); + + agb1.start(); + agb2.start(); + agb3.start(); + + try { + agb1.join(); + agb2.join(); + agb3.join(); + } catch (Exception e) { + e.printStackTrace(); + } + + // TEST +// Iterator paths = AGSZZ.agm.keySet().iterator(); +// +// while (paths.hasNext()) { +// String path = paths.next(); +// System.out.println("\tvalue: " + path); +// } +// +// System.out.println("\tannotationGraph size: " + AGSZZ.agm.size()); + } + + @Override + public void run() { + for (int i = 0; i < list.size(); i++) { + String path = list.get(i); + + try { + createAnnotationGraph(path); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (EmptyHunkTypeException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } + + private void createAnnotationGraph(String path) throws IOException, EmptyHunkTypeException { int childIdx, hunkIdx, offset; int beginOfChild, endOfChild; Line childLine; Hunk hunk; String hunkType; - // traverse all paths in the repo - Iterator paths = revsWithPath.keySet().iterator(); - - Iterator pathsForCalculatingSize = revsWithPath.keySet().iterator(); - int numOfPaths = Iterators.size(pathsForCalculatingSize); + List revs = revsWithPath.get(path); - int pathCnt = 1; - while (paths.hasNext()) { + // Generate subAnnotationGraph + HashMap> subAnnotationGraph = new HashMap>(); - String path = paths.next(); + ArrayList parentLineList = new ArrayList<>(); + ArrayList childLineList = new ArrayList<>(); - List revs = revsWithPath.get(path); + // Logging + System.out.println("\tPresent Thread Name is " + Thread.currentThread().getName() ); + System.out.println("\tBuilding Annotation Graph of " + path); - // Generate subAnnotationGraph - HashMap> subAnnotationGraph = new HashMap>(); + for (RevCommit childRev : revs) { + // Escape from the loop when there is no parent rev anymore + if (revs.indexOf(childRev) == revs.size() - 1) + break; - ArrayList parentLineList = new ArrayList<>(); - ArrayList childLineList = new ArrayList<>(); + RevCommit parentRev = revs.get(revs.indexOf(childRev) + 1); - // Logging - System.out.println("\nIn progress (" + pathCnt + " / " + numOfPaths + ")"); - System.out.println("\tBuilding Annotation Graph of " + path); + String parentContent = Utils.removeComments(GitUtils.fetchBlob(repo, parentRev, path)).trim(); + String childContent = Utils.removeComments(GitUtils.fetchBlob(repo, childRev, path)).trim(); - for (RevCommit childRev : revs) { - // Escape from the loop when there is no parent rev anymore - if (revs.indexOf(childRev) == revs.size() - 1) - break; + if (debug) { + System.out.println("path : " + path); + System.out.println("\tparent rev : " + parentRev.getName()); + System.out.println("\tchild rev : " + childRev.getName()); + } - RevCommit parentRev = revs.get(revs.indexOf(childRev) + 1); + // get the parent line list from content + configureLineList(parentLineList, path, parentRev, parentContent); - String parentContent = Utils.removeComments(GitUtils.fetchBlob(repo, parentRev, path)).trim(); - String childContent = Utils.removeComments(GitUtils.fetchBlob(repo, childRev, path)).trim(); + // get the child line list only when initial iteration + if (revs.indexOf(childRev) == 0) + configureLineList(childLineList, path, childRev, childContent); - if (debug) { - System.out.println("path : " + path); - System.out.println("\tparent rev : " + parentRev.getName()); - System.out.println("\tchild rev : " + childRev.getName()); + if (debug) { + System.out.println("\nParent"); + for (int i = 0; i < parentLineList.size(); i++) { + System.out.println(i + "th idx : " + parentLineList.get(i).getContent()); + } + System.out.println("\nChild"); + for (int i = 0; i < childLineList.size(); i++) { + System.out.println(i + "th idx : " + childLineList.get(i).getContent()); } + } + + ArrayList hunkList = configureHunkList(GitUtils.getEditListFromDiff(parentContent, childContent)); + + // map child line with its ancestor(s) + childIdx = 0; + hunkIdx = 0; + offset = 0; - // get the parent line list from content - configureLineList(parentLineList, path, parentRev, parentContent); + while (childIdx < childLineList.size()) { - // get the child line list only when initial iteration - if (revs.indexOf(childRev) == 0) - configureLineList(childLineList, path, childRev, childContent); + childLine = childLineList.get(childIdx); if (debug) { - System.out.println("\nParent"); - for (int i = 0; i < parentLineList.size(); i++) { - System.out.println(i + "th idx : " + parentLineList.get(i).getContent()); - } - System.out.println("\nChild"); - for (int i = 0; i < childLineList.size(); i++) { - System.out.println(i + "th idx : " + childLineList.get(i).getContent()); - } + System.out.println("\nHunk Rate : " + (hunkIdx + 1) + " / " + hunkList.size()); + System.out.println("Child Index Rate : " + childIdx + " / " + (childLineList.size() - 1)); + System.out.println("Offset : " + offset); } - ArrayList hunkList = configureHunkList(GitUtils.getEditListFromDiff(parentContent, childContent)); + // Case 1 - when there is no hunk anymore + if (hunkList.size() <= hunkIdx) { + if (debug) { + System.out.println("Connected parent index : " + (childIdx + offset) + " / " + + (parentLineList.size() - 1)); + System.out.println("No Hunk anymore\n"); + } + childLine.setLineType(LineType.CONTEXT); - // map child line with its ancestor(s) - childIdx = 0; - hunkIdx = 0; - offset = 0; + mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); - while (childIdx < childLineList.size()) { + childIdx++; + continue; + } - childLine = childLineList.get(childIdx); + hunk = hunkList.get(hunkIdx); + beginOfChild = hunk.getBeginOfChild(); + endOfChild = hunk.getEndOfChild(); + hunkType = hunk.getHunkType(); + if (debug) { + System.out.println("Hunk Type : " + hunk.getHunkType()); + System.out.println("bA : " + hunk.getBeginOfParent()); + System.out.println("eA : " + hunk.getEndOfParent()); + System.out.println("bB : " + hunk.getBeginOfChild()); + System.out.println("eB : " + hunk.getEndOfChild()); + } + + // Case 2 - child index is out of hunk range + if (childIdx < beginOfChild) { if (debug) { - System.out.println("\nHunk Rate : " + (hunkIdx + 1) + " / " + hunkList.size()); - System.out.println("Child Index Rate : " + childIdx + " / " + (childLineList.size() - 1)); - System.out.println("Offset : " + offset); + System.out.println("Connected parent index : " + (childIdx + offset) + " / " + + (parentLineList.size() - 1)); + System.out.println("Out of Hunk range\n"); + } + childLine.setLineType(LineType.CONTEXT); + mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); - // Case 1 - when there is no hunk anymore - if (hunkList.size() <= hunkIdx) { + } + // Case 3 - child index is in hunk range + else { + switch (hunkType) { + case "INSERT": if (debug) { - System.out.println("Connected parent index : " + (childIdx + offset) + " / " - + (parentLineList.size() - 1)); - System.out.println("No Hunk anymore\n"); + System.out.println("INSERT\n"); } - childLine.setLineType(LineType.CONTEXT); - mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); + // When childIdx is the last index in hunk, increment hunk index + if (childIdx == endOfChild - 1) + hunkIdx++; - childIdx++; - continue; - } + childLine.setLineType(LineType.INSERT); - hunk = hunkList.get(hunkIdx); - beginOfChild = hunk.getBeginOfChild(); - endOfChild = hunk.getEndOfChild(); - hunkType = hunk.getHunkType(); + offset--; - if (debug) { - System.out.println("Hunk Type : " + hunk.getHunkType()); - System.out.println("bA : " + hunk.getBeginOfParent()); - System.out.println("eA : " + hunk.getEndOfParent()); - System.out.println("bB : " + hunk.getBeginOfChild()); - System.out.println("eB : " + hunk.getEndOfChild()); - } + break; - // Case 2 - child index is out of hunk range - if (childIdx < beginOfChild) { + case "REPLACE": if (debug) { - System.out.println("Connected parent index : " + (childIdx + offset) + " / " - + (parentLineList.size() - 1)); - System.out.println("Out of Hunk range\n"); + System.out.println("REPLACE\n"); + } + // When childIdx is the last index in hunk, update offset and increment hunk + // index + if (childIdx == endOfChild - 1) { + offset += (hunk.getRangeOfParent() - hunk.getRangeOfChild()); + hunkIdx++; } - childLine.setLineType(LineType.CONTEXT); - mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); - } - // Case 3 - child index is in hunk range - else { - switch (hunkType) { - case "INSERT": - if (debug) { - System.out.println("INSERT\n"); - } + // check whether format change happens + String mergedParentContent = Utils + .mergeLineList(parentLineList.subList(hunk.getBeginOfParent(), hunk.getEndOfParent())); + String mergedChildContent = Utils + .mergeLineList(childLineList.subList(hunk.getBeginOfChild(), hunk.getEndOfChild())); - // When childIdx is the last index in hunk, increment hunk index - if (childIdx == endOfChild - 1) - hunkIdx++; + if (mergedParentContent.equals(mergedChildContent)) + childLine.setFormatChange(true); - childLine.setLineType(LineType.INSERT); + childLine.setLineType(LineType.REPLACE); + mapChildLineWithAncestors(hunk, parentLineList, childLine); - offset--; + break; + case "DELETE": + // If the last child line is in DELETE, it maps with nothing + if (childIdx == childLineList.size() - 1) break; - case "REPLACE": - if (debug) { - System.out.println("REPLACE\n"); - } + // If the begin of child belongs to both DELETE and INSERT + if (belongsToBothDELETEAndINSERT(hunkList, hunkIdx, beginOfChild)) { + offset += hunk.getRangeOfParent() - 1; - // When childIdx is the last index in hunk, update offset and increment hunk - // index - if (childIdx == endOfChild - 1) { - offset += (hunk.getRangeOfParent() - hunk.getRangeOfChild()); + childLine.setLineType(LineType.INSERT); + hunkIdx++; - hunkIdx++; + if (debug) { + System.out.println("INSERT\n"); } - // check whether format change happens - String mergedParentContent = Utils.mergeLineList( - parentLineList.subList(hunk.getBeginOfParent(), hunk.getEndOfParent())); - String mergedChildContent = Utils - .mergeLineList(childLineList.subList(hunk.getBeginOfChild(), hunk.getEndOfChild())); - - if (mergedParentContent.equals(mergedChildContent)) - childLine.setFormatChange(true); - - childLine.setLineType(LineType.REPLACE); - mapChildLineWithAncestors(hunk, parentLineList, childLine); - break; + } - case "DELETE": - // If the last child line is in DELETE, it maps with nothing - if (childIdx == childLineList.size() - 1) - break; - - // If the begin of child belongs to both DELETE and INSERT - if (belongsToBothDELETEAndINSERT(hunkList, hunkIdx, beginOfChild)) { - offset += hunk.getRangeOfParent() - 1; - - childLine.setLineType(LineType.INSERT); - hunkIdx++; - - if (debug) { - System.out.println("INSERT\n"); - } - - break; - } - - offset += hunk.getRangeOfParent(); + offset += hunk.getRangeOfParent(); - childLine.setLineType(LineType.CONTEXT); - mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); + childLine.setLineType(LineType.CONTEXT); + mapChildLineWithAncestor(childIdx, offset, parentLineList, childLine); - hunkIdx++; + hunkIdx++; - if (debug) { - System.out.println("Connected parent index : " + (childIdx + offset) + " / " - + (parentLineList.size() - 1)); - System.out.println("DELETE\n"); - } + if (debug) { + System.out.println("Connected parent index : " + (childIdx + offset) + " / " + + (parentLineList.size() - 1)); + System.out.println("DELETE\n"); + } - break; + break; - default: - throw new EmptyHunkTypeException(); - } + default: + System.out.println("EmptyHunkTypeException occur"); + throw new EmptyHunkTypeException(); } - - childIdx++; } - // put lists of line corresponding to commit into subAG - subAnnotationGraph.put(parentRev, parentLineList); - subAnnotationGraph.put(childRev, childLineList); - - childLineList = parentLineList; - parentLineList = new ArrayList(); + childIdx++; } - // put subAG corresponding to path into AG - annotationGraph.put(path, subAnnotationGraph); - pathCnt++; + // put lists of line corresponding to commit into subAG + subAnnotationGraph.put(parentRev, parentLineList); + subAnnotationGraph.put(childRev, childLineList); + + childLineList = parentLineList; + parentLineList = new ArrayList(); } + // put subAG corresponding to path into AG + AGSZZ.agm.put(path, subAnnotationGraph); - return annotationGraph; } private boolean belongsToBothDELETEAndINSERT(ArrayList hunkList, int currHunkIdx, int currBeginOfChild) { @@ -275,8 +337,9 @@ private void configureLineList(ArrayList lst, String path, RevCommit rev, String committer = rev.getCommitterIdent().getName(); String author = rev.getAuthorIdent().getName(); String StringDateTime = Utils.getStringDateTimeFromCommitTime(rev.getCommitTime()); - - Line line = new Line(path, rev.getName(), contentArr[i], i, LineType.CONTEXT, ancestors, false, committer, author, StringDateTime); + + Line line = new Line(path, rev.getName(), contentArr[i], i, LineType.CONTEXT, ancestors, false, committer, + author, StringDateTime); lst.add(line); }