-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_raw_dataset.ps1
executable file
·148 lines (113 loc) · 5.72 KB
/
create_raw_dataset.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
$MS_BUILD_PATH = 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Current\Bin'
$MSBUILD_EXE = "${MS_BUILD_PATH}\MSBuild.exe"
$CURRENT_DIR = $PWD
$OUTPUT_DIR_FIX = "$CURRENT_DIR/raw_dataset/diffs"
$OUTPUT_DIR_FIX_EMPTY = "$CURRENT_DIR/raw_dataset/diffs_empty"
$OUTPUT_DIR_ANALYSIS = "$CURRENT_DIR/raw_dataset/analysis_files"
$OUTPUT_DIR_ANALYSIS_EMPTY = "$CURRENT_DIR/raw_dataset/analysis_files_empty"
$SUBMODULE_REPOS_DIR = "./submodule_repos_to_analyze"
$OUTPUT_DIR_TIMINGS = "$CURRENT_DIR/raw_dataset/timings"
# Create folders if not exist:
$null = [System.IO.Directory]::CreateDirectory($OUTPUT_DIR_ANALYSIS)
$null = [System.IO.Directory]::CreateDirectory($OUTPUT_DIR_ANALYSIS_EMPTY)
$null = [System.IO.Directory]::CreateDirectory($OUTPUT_DIR_FIX)
$null = [System.IO.Directory]::CreateDirectory($OUTPUT_DIR_FIX_EMPTY)
$null = [System.IO.Directory]::CreateDirectory($OUTPUT_DIR_TIMINGS)
. ./create_raw_dataset_functions.ps1
$ANALYZER_PACKAGES = Get-Childitem -Path nuget_analyzer_packages/* |
Foreach-Object {
@{ NugetFullname = $_.Name; NugetPath = $_.FullName }
}
# $ANALYZER_PACKAGES = @(@{ NugetFullname = "Agoda.Analyzers.1.0.517"; NugetPath = "C:\Users\vlohse\Desktop\neural-repair-static-analysis\nuget_analyzer_packages\Agoda.Analyzers.1.0.517" })
Write-Output "Loaded ANALYZER_PACKAGES"
$ANALYZER_PACKAGE_DETAILS = Import-Csv -Path "analyzer_package_details_filtered.csv"
Write-Output "Loaded ANALYZER_PACKAGE_DETAILS"
[string[]]$RELEVANT_ANALYZER_PACKAGES = Get-Content -Path ./nuget_packages_relevant_sources.txt
$GH_REPOS = Import-Csv -Path "github_repos.csv"
$GH_REPOS | ForEach-Object -ThrottleLimit 10 -Parallel {
$REPO_NAME = $_.RepoName
$REPO_URL = $_.RepoURL
$REPO_PATH = "$Using:SUBMODULE_REPOS_DIR/$REPO_NAME"
Write-Output "Adding submodule: $REPO_NAME"
git submodule add $REPO_URL $REPO_PATH
cd $REPO_PATH
$LAST_COMMIT = $(git log -n 1 --pretty=format:"%H")
Write-Output "Last commit: $LAST_COMMIT"
cd "$Using:CURRENT_DIR"
Write-Output "REPO_PATH: $REPO_PATH"
# Doing this again because the functions cannot be read in parallel otherwise (?)
. ./create_raw_dataset_functions.ps1
$SOLUTION_FILES = GetAllRepoSolutions $REPO_PATH
$NUMBER_SOLUTIONS = $SOLUTION_FILES.Count
Write-Output "NUMBER_SOLUTIONS: $NUMBER_SOLUTIONS"
$swTotal = [Diagnostics.Stopwatch]::StartNew()
# Cannot apply roslynator by file; only by project/solution;
# Might as well apply to entire solution.
# $SOLUTION_FILES | ForEach-Object -ThrottleLimit 10 -Parallel {
foreach ($SOLUTION_FILE in $SOLUTION_FILES) {
$SOLUTION_FILENAME = $SOLUTION_FILE.Filename
$SOLUTION_FILEPATH = $SOLUTION_FILE.Filepath
Invoke-Expression "& '$Using:MSBUILD_EXE' $SOLUTION_FILEPATH /t:Restore"
# Invoke-Expression "& '$Using:MSBUILD_EXE' $SOLUTION_FILEPATH /t:Clean"
Write-Output "Working with SOLUTION_FILENAME: $SOLUTION_FILENAME"
# Necessary because otherwise .Contains() cannot be run
$RELEVANT_ANALYZER_PACKAGES_COPY = $Using:RELEVANT_ANALYZER_PACKAGES
foreach ($ANALYZER_PACKAGE in $Using:ANALYZER_PACKAGES) {
$NUGET_FULL_NAME = $ANALYZER_PACKAGE.NugetFullname
$NUGET_PATH = $ANALYZER_PACKAGE.NugetPath
if (-Not ($RELEVANT_ANALYZER_PACKAGES_COPY.Contains($NUGET_FULL_NAME))) {
continue
}
Write-Output "<<<$SOLUTION_FILENAME>>> Using NuGet package: $NUGET_FULL_NAME"
$OUTPUT_FILENAME = "${REPO_NAME}__${SOLUTION_FILENAME}__${LAST_COMMIT}__${NUGET_FULL_NAME}"
$ANALYSIS_FILEPATH = "$Using:OUTPUT_DIR_ANALYSIS/${OUTPUT_FILENAME}.xml"
$ANALYSIS_FILEPATH_EMPTY = "$Using:OUTPUT_DIR_ANALYSIS_EMPTY/${OUTPUT_FILENAME}.xml"
ApplyRoslynatorAnalysis `
$ANALYSIS_FILEPATH `
$ANALYSIS_FILEPATH_EMPTY `
$SOLUTION_FILEPATH `
$NUGET_PATH
if (Test-Path $ANALYSIS_FILEPATH_EMPTY) {
Write-Output "<<<$SOLUTION_FILENAME>>> No analysis generated for $ANALYSIS_FILEPATH. Skipping fixes"
continue
}
if (!(Test-Path $ANALYSIS_FILEPATH)) {
Write-Output "<<<$SOLUTION_FILENAME>>> No analysis generated for $ANALYSIS_FILEPATH. Skipping fixes"
Out-File -FilePath $ANALYSIS_FILEPATH_EMPTY
continue
}
# Get all diagnostic ids which generated a diagnostic on the solution
[XML]$ANALYSIS_XML = Get-Content $ANALYSIS_FILEPATH
$DIAGNOSTIC_IDS = $ANALYSIS_XML.Roslynator.CodeAnalysis.Summary.Diagnostic |
Foreach-Object {
$_.Id
}
# Breaking down diffs into single diagnostics;
foreach($DIAGNOSTIC_ID in $DIAGNOSTIC_IDS){
RunAndSaveFix `
$REPO_PATH `
$SOLUTION_FILEPATH `
$NUGET_FULL_NAME `
$NUGET_PATH `
$OUTPUT_FILENAME `
$Using:OUTPUT_DIR_FIX `
$Using:OUTPUT_DIR_FIX_EMPTY `
$Using:ANALYZER_PACKAGE_DETAILS `
$DIAGNOSTIC_ID
# break
}
# break
}
# break
}
$swTotal.Stop()
$ELAPSED_MINUTES = $swTotal.Elapsed.TotalMinutes
$NUMBER_SOLUTIONS = $SOLUTION_FILES.Count
$RESULT = "ELAPSED_MINUTES: $ELAPSED_MINUTES, NUMBER_SOLUTIONS: $NUMBER_SOLUTIONS"
$TIMER_RESULTS_PATH = "$Using:OUTPUT_DIR_TIMINGS/ALL_SOLUTIONS__${REPO_NAME}.txt"
if (!(Test-Path $TIMER_RESULTS_PATH)) {
[void](New-Item -ItemType "file" -Path $TIMER_RESULTS_PATH)
}
$RESULT > $TIMER_RESULTS_PATH
# break
}