Skip to content

Commit

Permalink
V2 rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
benedikt-schesch authored Sep 30, 2023
1 parent b654702 commit a8d61a2
Show file tree
Hide file tree
Showing 1,975 changed files with 2,806 additions and 106,762 deletions.
22 changes: 15 additions & 7 deletions .github/workflows/check-style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,26 @@ name: Check style
on: [push, pull_request]
jobs:
style:
defaults:
run:
shell: bash -l {0}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
python-version: '3.8'
- name: Setup miniconda
uses: conda-incubator/setup-miniconda@v2
with:
python-version: 3.8
auto-update-conda: true
channels: conda-forge,defaults
mamba-version: "*"
activate-environment: AST
environment-file: environment.yml
- name: Install shellcheck and checkbashisms
run: sudo apt install shellcheck devscripts
- name: Check style
Expand Down
23 changes: 13 additions & 10 deletions .github/workflows/small-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
shell: bash -l {0}
steps:
- uses: actions/setup-java@v3
with:
Expand All @@ -30,18 +30,21 @@ jobs:
- run: echo "${GITHUB_WORKSPACE}/src/scripts/merge_tools" >> $GITHUB_PATH
- run: java -version
- uses: actions/checkout@v3
- uses: fregante/setup-git-user@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- run: python --version
python-version: '3.8'
- name: Setup miniconda
uses: conda-incubator/setup-miniconda@v2
with:
python-version: 3.8
auto-update-conda: true
channels: conda-forge,defaults
mamba-version: "*"
activate-environment: AST
environment-file: environment.yml
- name: Install PdfLaTeX
run: sudo apt update && sudo apt install texlive-latex-extra -y
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Install maven
uses: s4u/[email protected]
with:
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
.vscode
venv
**/*_explanation.txt
test_cache/
cache-small/
apache-maven-3.9.2/
results/local_repos.csv
machines.txt
.valid_merges
cache/
artifacts/
artifacts.tar.gz
*.hprof

output/
merge_repo/
Expand Down Expand Up @@ -108,3 +109,5 @@ docs/_build/

# Emacs
TAGS

!JSCover/**
17 changes: 7 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ clean:
rm -rf repos
rm -rf scratch
rm -rf results-small
rm -rf .valid_merges_counters

# This target deletes files in the cache, which is commited to version control.
clean-cache:
rm -rf cache

# This target deletes files in the test cache.
clean-test-cache:
rm -rf test_cache
rm -rf cache-small

# This target deletes files that are committed to version control.
clean-stored-hashes:
Expand All @@ -49,7 +50,7 @@ clean-everything: clean clean-cache clean-test-cache clean-stored-hashes
# Compresses the cache.
compress-cache:
rm -r cache.tar
tar --exclude="*explanation.txt" -czf cache.tar cache
tar --exclude="lock" -czf cache.tar cache

# Decompresses the cache.
decompress-cache:
Expand All @@ -74,21 +75,17 @@ update-cache-results:
# As of 2023-07-31, this takes 5-20 minutes to run, depending on your machine.
small-test:
${MAKE} clean-test-cache clean
./run_small.sh -d
./run_small.sh --include_trivial_merges
${MAKE} small-test-diff

small-test-diff:
@echo
@echo "Here is the file content, in case a diff fails."
more results-small/*.csv results-small/merges/*.csv results-small/merges_valid/*.csv | cat
more results-small/*.csv results-small/merges/*.csv results-small/merges_compared/*.csv results-small/merges_tested/*.csv | cat
@echo
if grep -Fqvf results-small/merges/ez-vcard.csv test/small-goal-files/merges/ez-vcard.csv; then exit 1; fi
if grep -Fqvf results-small/merges/Algorithms.csv test/small-goal-files/merges/Algorithms.csv; then exit 1; fi
python3 test/remove-run_time-columns.py --input results-small/result.csv --output results-small/result-without-times.csv
python3 test/remove-run_time-columns.py --input results-small/filtered_result.csv --output results-small/filtered_result-without-times.csv
python3 test/check_equal_csv.py --actual_folder results-small/ --goal_folder test/small-goal-files/
@echo
diff -x tools -x defs.tex -x git -x merges -x .gitignore -x git -x result.csv -x plots -x filtered_result.csv -x table_run_time.tex -x .DS_Store -x '*~' -x '#*#' -r -U3 test/small-goal-files results-small
rm -f test/small-goal-files/result-without-times.txt results-small/result-without-times.txt
diff -x table_run_time.tex -r -U3 test/small-goal-files/tables/all test/small-goal-files/tables/all

gradle-assemble:
./gradlew assemble -g ../.gradle/
Expand Down
82 changes: 41 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,22 @@

### Python

To install all the python requirements:
To install all the python requirements create a conda environment:

With mamba (faster https://github.com/mamba-org/mamba):

```bash
pip install -r requirements.txt
mamba env create -f environment.yml
mamba activate AST
```

### Alternative Python installation

If you don't want to mess with your local python installation you can create a python virtual environment to install all dependencies with the following commands:
With conda:

```bash
pip3 install virtualenv
python3 -m venv venv
source venv/bin/activate
conda env create -f environment.yml
conda activate AST
```

If you did the previous step make sure the virtual environemnt is activated when you use the repo (`source venv/bin/activate`).

### Maven

Make sure you use maven version 3.9.*.
Expand Down Expand Up @@ -68,9 +66,16 @@ make small-test

This runs the entire code on two small repos.
The output data appears in `results-small/`.
* `results-small/result.csv`: the final result
* `results-small/merges_small/` contains all the merges.
* `results-small/merges_small_valid/` contains all the merges and also records whether the parents of a merge pass tests.

* `results-small/result.csv`: the final result

* `results-small/merges/` contains all the merges.

* `results-small/merges_compared/` contains all merges and indicates whether the merge results are different and thus need to be analyzed.

* `results-small/merges_tested/` contains all merges that have been tested.

* `results-small/result.csv` contains the final result.

### Perform full analysis

Expand All @@ -90,13 +95,7 @@ This will run the entire code on all the repos and automatically decompress the
All the output data can be found in `results/`.
The final result is found in `results/result.csv`.
Directory `results/merges` contains all the merges for each repo.
Directory `results/merges_valid` contains all the merges and also stores if the parents of a merge pass tests.

To delete cache entries on failed merges, inconsistent merges, failed trivial merges and reexecute the stack multiple times over and over:

```bash
./run_full_restart.sh <n_repeat>
```
Directory `results/merges_tested` contains all the merges that have been tested.

To execute `run_full.sh` on multiple machines in parallel create a machine address list in `machines.txt` and run:

Expand Down Expand Up @@ -126,81 +125,82 @@ To run style checking run `make style`.

---

## Code structure

![alt text](illustrations/Architecture.drawio.png "Title")

## Directory structure

### Commited files
### Committed files

* run.sh -> This file executes each step of the stack.

* run_small.sh -> This file executes the stack on two repositories.

* run_full.sh -> This file executes the stack on all the repositories.

* run_full_restart.sh -> This file executes the stack and repeats failed merges, inconsistent merges and failed trivial multiple times.

* src/ -> contains the following scripts:

* python/ -> contains the following scripts:

* merge_tester.py -> Main file which performs merges and evaluates all the results across all projects.

* validate_repos.py -> Checks out all repos and removes all repos that fail their tests on main branch.
* test_repo_heads.py -> Checks out all repos and removes all repos that fail their tests on main branch.

* latex_output.py -> Output latex code for the resulting plots and table.

* test_parent_commits.py -> Tests if the parents of a commit pass their tests.
* merge_tools_comparator.py -> Compares merges that produce different output.

* get_repos.py -> Downloads the repos list.

* cache_merger.py -> Merges the current cache with the cache.tar
* cache_utils.py -> Contains functions to store and load the cache.

* delete_cache_entries.py -> Delete specific cache entries.
* clean_cache_placeholders.py -> Removes all the cache placeholders.

* delete_inconsistent_merge_results.py -> Delete inconsistent merge results.
* repo.py -> Contains the Repo class which represents a repo.

* delete_failed_trivial_merge_results.py -> Delete failed trivial merge results.
* split_repos.py -> Splits the repos for parallel execution.

* write_head_hashes.py -> Writes the head hashes of all repos to a file.

* scripts/ -> contains the following scripts:

* run_repo_tests.sh -> Runs a repo's programmer provided tests.

* merge_tools/ -> contains the following scripts:
* gitmerge.sh -> Executes git merge on a specific merge.
* intellimerge.sh -> Executes intellimerge on a specific merge.
* spork.sh -> Executes spork on a specific merge.
* merge_tools/ -> Contains all the merge tools scripts.

* utils/

* run_remotely.sh -> Runs the full stack on a remote machine.

* run_multiple_machine.sh -> Runs the full stack on multiple remote machines.

* src/main/java/astmergeevaluation/FindMergeCommits.java -> Finds all merge commits in a repo.

* input_data/ -> Input data, which is a list of repositories; see its README.md.

### Uncommited Files
### Uncommitted Files

* cache/ -> This folder is a cache for each computation. contains:

* test_result/ -> Caches the test results for a specific commit. Used for parent testing and repo validation.

* merge_test_results/ -> Caches the test results for specific merges. Used for merge testing. First line indicates the merge result, second line indicates the runtime.
* merge_test_results/ -> Caches the test results for specific merges. Used for merge testing. First line indicates the merge result, second line indicates the run time.

* merge_diff_results/ -> Caches the diff results for specific merges.

* test_cache/ -> This folder is a cache for each test computation. contains:
* cache-small/ -> This folder is a cache for each test computation. contains:

* test_result/ -> Caches the test results for a specific commit. Used for parent testing and repo validation.

* merge_test_results/ -> Caches the test results for specific merges. Used for merge testing. First line indicates the merge result, second line indicates the runtime.
* merge_test_results/ -> Caches the test results for specific merges. Used for merge testing. First line indicates the merge result, second line indicates the run time.

* .workdir/ -> This folder is used for the local computations of each process and contaent is named by Unix process (using "$$").
* .workdir/ -> This folder is used for the local computations of each process and content is named by Unix process (using "$$"). If `DELETE_WORKDIRS` is set to `false` in `src/python/repo.py` this folder is not deleted after the computation and can be inspectedx.

* repos/ -> In this folder each repo is cloned.

* results/ -> Contains all the results for the full analysis.

* results-small/ -> Contains all the results for the small analysis.

* jars/ -> Location for the Intellimerge and Spork jars.

* scratch/ -> If STORE_SCRATCH is enabled in `merge_tester.py`, each merge will be stored in this location.
* jars/ -> Location for the IntelliMerge and Spork jars.
Binary file removed cache.tar
Binary file not shown.
33 changes: 33 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: AST
channels:
- defaults
dependencies:
- bzip2
- ca-certificates
- ld_impl_linux-64
- libffi
- libuuid
- ncurses
- openssl
- pip
- readline
- setuptools
- sqlite
- tk
- tzdata
- wheel
- xz
- zlib
- python=3.8
- pip:
- fasteners==0.18
- matplotlib==3.6.3
- numpy==1.23.5
- pandas==2.0.2
- pylint==2.17.5
- GitPython==3.1.31
- prettytable==3.8.0
- seaborn==0.12.2
- tqdm==4.64.1
- black==23.3.0
- psutil==5.9.5
Binary file added illustrations/Architecture.drawio.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions input_data/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
This directory contains the input data, which is a list of repositories.
To refresh the contents of this directory, run script `get_repos.py`.

* repos.csv.gz -> The reaper dataset. (This is not committed to this repository; it is downloaded by `get_repos.py`.)
* repos.csv.gz -> The Reaper dataset. (This is not committed to this repository; it is downloaded by `get_repos.py`.)

* repos.csv -> List of all repos that fulfill the reaper selection criterion.
* repos.csv -> List of all repos that fulfill the Reaper selection criterion.

* repos_with_hashes.csv -> repos.csv but with the tested commit for repository validation

Expand Down
2 changes: 1 addition & 1 deletion input_data/repos.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl
329,Complexible/stardog-spring,Java,1.0,1,0,0.36999,0.0,0.0,1,2153,0.403618,12,0,0,1,1
1502,sdeo/protobuf-socket-rpc,Java,1.0,1,0,0.233562,0.0,0.0,1,7472,0.323622,18,0,0,1,1
2865,twitter/netty-http2,Java,1.0,2,1,0.22231,0.368421,0.0,1,5886,0.341296,41,1,0,1,1
Expand Down
2 changes: 1 addition & 1 deletion input_data/repos_small.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl
16594,mangstadt/ez-vcard,Java,1.0,1,0,0.433617,10.5,0.0,1,34146,0.448443,54,1,0,1,1
296895,pedrovgs/Algorithms,Java,0.932927,1,1,0.390942,89.0,0.75,1,6616,0.556371,1377,1,1,1,1
1624004,tntim96/JSCover,Java,0.985816,1,0,0.490264,25.0,6.448276,1,54612,0.627527,298,1,1,1,1
2 changes: 1 addition & 1 deletion input_data/repos_small_with_hashes.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl,Validation hash
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl,head hash
0,mangstadt/ez-vcard,Java,1.0,1,0,0.433617,10.5,0.0,1,34146,0.448443,54,1,0,1,1,95298de036f1df38fd7ca7e3225c4fd5ad5c44e6
1,pedrovgs/Algorithms,Java,0.932927,1,1,0.390942,89.0,0.75,1,6616,0.556371,1377,1,1,1,1,c108e3a4b4fa2556b8c88d7ab6803084b01a4364
1783,tntim96/JSCover,Java,0.985816,1,0,0.490264,25.0,6.448276,1,54612,0.627527,298,1,1,1,1,fff7a84ce12a037518e3c58e1d941a4d3dabb7e2
2 changes: 1 addition & 1 deletion input_data/repos_with_hashes.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl,Validation hash
idx,repository,language,architecture,community,continuous_integration,documentation,history,issues,license,size,unit_test,stars,scorebased_org,randomforest_org,scorebased_utl,randomforest_utl,head hash
0,Complexible/stardog-spring,Java,1.0,1,0,0.36999,0.0,0.0,1,2153,0.403618,12,0,0,1,1,05ed950161bfa7a4cd23922eddd760a51cfc7089
1,sdeo/protobuf-socket-rpc,Java,1.0,1,0,0.233562,0.0,0.0,1,7472,0.323622,18,0,0,1,1,b1cf847b9922c2bc146a9eabf7c0df52428bf076
2,twitter/netty-http2,Java,1.0,2,1,0.22231,0.368421,0.0,1,5886,0.341296,41,1,0,1,1,e8df896db61045b118f3423196c41c11c162b1c0
Expand Down
Loading

0 comments on commit a8d61a2

Please sign in to comment.