Skip to content

Commit

Permalink
make prepare data script work
Browse files Browse the repository at this point in the history
  • Loading branch information
ferugit committed Mar 1, 2023
1 parent dd7b7a2 commit 1c6ed63
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def get_stratified_partition(dataframe, seed):
train_df, dev_df = split_dataset(dataframe, test_size=0.1, random_state=seed)
dev_df, low_quantity_data_df = check_low_quantity_labels(dev_df)
dev_df, test_df = split_dataset(dev_df, test_size=0.5, random_state=seed)
test_df = test_df.append(low_quantity_data_df)
test_df = pd.concat([test_df, low_quantity_data_df])
return train_df, dev_df, test_df


Expand Down Expand Up @@ -133,14 +133,14 @@ def get_classes_index(dataframe):
def main(args):

# Destination path
if(args.dst != ""):
output_path = args.dst
else:
output_path = os.path.join(args.src, 'metadata')
if (not os.path.isfile(args.src)) and (not os.path.isdir(args.dst)):
raise Exception('Non valid arguments!')

output_path = args.dst
check_path(output_path)

# Read tsv
df_path = os.path.join(args.src, 'metadata/donateacry.tsv')
df_path = os.path.join(args.src)
df = pd.read_csv(df_path, header=0, sep='\t')

# Get and save classes index
Expand Down Expand Up @@ -171,7 +171,7 @@ def main(args):

parser = argparse.ArgumentParser(description="Script to partitions for donateacry dataset")

parser.add_argument("--src", help="source directory", default="/home/fernandol/.gymnos/datasets/donateacry/donateacry_corpus_cleaned_and_updated_data/")
parser.add_argument("--src", help="source tsv with metadata", default="")
parser.add_argument("--dst", help="destination directory", default="")
parser.add_argument('--seed', type=int, default=0, help='partition seed')
args = parser.parse_args()
Expand Down

0 comments on commit 1c6ed63

Please sign in to comment.