Skip to content

Commit

Permalink
tiny cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiObeck committed Nov 15, 2021
1 parent f283e8d commit 5378948
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/preprocessing/run_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ def main():
if args.tokenize:
preprocessors.append(Tokenizer(args.tokenize_input, args.tokenize_input + SUFFIX_TOKENIZED))
if args.other:
preprocessors.append(NonEnglishRemover())
DROP_COLS = [
"id", "conversation_id", "created_at", "timezone", "user_id", "name", "place",
"replies_count", "retweets_count", "likes_count", "language",
# "cashtag" only few records have this filled. Might be useless
# below columns have always the same value for all records
"retweet", "near", "geo", "source", "user_rt_id", "user_rt", "retweet_id",
"retweet_date", "translate", "trans_src", 'trans_dest\r']

preprocessors.append(NonEnglishRemover())

preprocessors.append(ColumnDropper(DROP_COLS))

# call all preprocessing steps
Expand Down

0 comments on commit 5378948

Please sign in to comment.