-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8771e99
commit 1d71aa6
Showing
15 changed files
with
2,105 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ cache/ | |
artifacts/ | ||
artifacts.tar.gz | ||
*.hprof | ||
cache.tar | ||
|
||
output/ | ||
merge_repo/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/env bash | ||
|
||
# usage: ./run_full.sh [-i <machine_id> -n <num_machines>] [-d] | ||
# Runs the stack all the repositories | ||
# The output appears in result/ . | ||
# <machine_id> optional argument to specify the id of the current machine. | ||
# <num_machine> optional argument to specify the total number of machines used. | ||
# <diff> optional argument to specify whether to diff the merges. | ||
# Warning: This takes days to run. | ||
|
||
|
||
set -e | ||
set -o nounset | ||
|
||
# Check if cache.tar exists and cache is missing | ||
if [ -f cache.tar ] && [ ! -d cache ]; then | ||
echo "Decompressing cache.tar" | ||
make decompress-cache | ||
fi | ||
|
||
./run.sh input_data/repos_1000.csv results-trivial-merges 20 cache "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/env python3 | ||
""" Samples n_merges for each repository. | ||
usage: python3 sample_merges.py --repos_head_passes_csv <path_to_repos_head_passes.csv> | ||
--merges_path <path_to_merges> | ||
--output_dir <output_dir> | ||
--include_trivial_merges (optional) | ||
--only_trivial_merges (optional) | ||
This script samples n_merges for each repository. | ||
If the flag --include_trivial_merges is set, then the script will also output | ||
merges that are trivial. | ||
If the flag --only_trivial_merges is set, then the script will only output | ||
merges that are trivial. | ||
""" | ||
|
||
import os | ||
import argparse | ||
from pathlib import Path | ||
import pandas as pd | ||
from tqdm import tqdm | ||
import numpy as np | ||
from cache_utils import slug_repo_name | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--repos_head_passes_csv", type=Path) | ||
parser.add_argument("--merges_path", type=Path) | ||
parser.add_argument("--output_dir", type=Path) | ||
parser.add_argument("--n_merges", type=int, default=100) | ||
parser.add_argument("--include_trivial_merges", action="store_true") | ||
parser.add_argument("--only_trivial_merges", action="store_true") | ||
args = parser.parse_args() | ||
|
||
repos = pd.read_csv(args.repos_head_passes_csv, index_col="idx") | ||
Path(args.output_dir).mkdir(parents=True, exist_ok=True) | ||
|
||
for _, repository_data in tqdm(repos.iterrows(), total=len(repos)): | ||
repo_slug = repository_data["repository"] | ||
merge_list_file = Path( | ||
os.path.join(args.merges_path, slug_repo_name(repo_slug) + ".csv") | ||
) | ||
output_file = Path( | ||
os.path.join(args.output_dir, slug_repo_name(repo_slug) + ".csv") | ||
) | ||
if not merge_list_file.exists(): | ||
print( | ||
"sample_merges:", | ||
repo_slug, | ||
"does not have a list of merges. Missing file: ", | ||
merge_list_file, | ||
) | ||
continue | ||
|
||
if output_file.exists(): | ||
print( | ||
"sample_merges: Skipping", | ||
repo_slug, | ||
"because it is already computed.", | ||
) | ||
continue | ||
try: | ||
merges = pd.read_csv(merge_list_file, header=0, index_col="idx") | ||
except pd.errors.EmptyDataError: | ||
print( | ||
"sample_merges: Skipping", | ||
repo_slug, | ||
"because it does not contain any merges.", | ||
) | ||
continue | ||
|
||
merges["notes"].replace(np.nan, "", inplace=True) | ||
if args.only_trivial_merges: | ||
merges = merges[merges["notes"].str.contains("a parent is the base")] | ||
elif not args.include_trivial_merges: | ||
merges = merges[~merges["notes"].str.contains("a parent is the base")] | ||
|
||
n_merges = min(merges.shape[0], args.n_merges) | ||
sample = merges.sample(n_merges, random_state=42) | ||
sample.sort_index(inplace=True) | ||
sample.to_csv(output_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters